diff options
Diffstat (limited to 'tools')
116 files changed, 4490 insertions, 659 deletions
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 1e2ab148d5db..e1900abd44f6 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -149,6 +149,24 @@ FEATURE_DISPLAY ?= \ # FEATURE_GROUP_MEMBERS-libbfd = libbfd-liberty libbfd-liberty-z +# +# Declare list of feature dependency packages that provide pkg-config files. +# +FEATURE_PKG_CONFIG ?= \ + libtraceevent \ + libtracefs + +feature_pkg_config = $(eval $(feature_pkg_config_code)) +define feature_pkg_config_code + FEATURE_CHECK_CFLAGS-$(1) := $(shell $(PKG_CONFIG) --cflags $(1) 2>/dev/null) + FEATURE_CHECK_LDFLAGS-$(1) := $(shell $(PKG_CONFIG) --libs $(1) 2>/dev/null) +endef + +# Set FEATURE_CHECK_(C|LD)FLAGS-$(package) for packages using pkg-config. +ifneq ($(PKG_CONFIG),) + $(foreach package,$(FEATURE_PKG_CONFIG),$(call feature_pkg_config,$(package))) +endif + # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not # mentioned in this list we need to refactor this ;-). diff --git a/tools/include/asm/rwonce.h b/tools/include/asm/rwonce.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/asm/rwonce.h diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h new file mode 100644 index 000000000000..8a27bc5c7a7f --- /dev/null +++ b/tools/include/uapi/linux/fs.h @@ -0,0 +1,552 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FS_H +#define _UAPI_LINUX_FS_H + +/* + * This file has definitions for some important file table structures + * and constants and structures used by various generic file system + * ioctl's. Please do not make any changes in this file before + * sending patches for review to linux-fsdevel@vger.kernel.org and + * linux-api@vger.kernel.org. + */ + +#include <linux/limits.h> +#include <linux/ioctl.h> +#include <linux/types.h> +#ifndef __KERNEL__ +#include <linux/fscrypt.h> +#endif + +/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ +#if !defined(__KERNEL__) +#include <linux/mount.h> +#endif + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) + +#define SEEK_SET 0 /* seek relative to beginning of file */ +#define SEEK_CUR 1 /* seek relative to current file position */ +#define SEEK_END 2 /* seek relative to end of file */ +#define SEEK_DATA 3 /* seek to the next data */ +#define SEEK_HOLE 4 /* seek to the next hole */ +#define SEEK_MAX SEEK_HOLE + +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ + +struct file_clone_range { + __s64 src_fd; + __u64 src_offset; + __u64 src_length; + __u64 dest_offset; +}; + +struct fstrim_range { + __u64 start; + __u64 len; + __u64 minlen; +}; + +/* + * We include a length field because some filesystems (vfat) have an identifier + * that we do want to expose as a UUID, but doesn't have the standard length. + * + * We use a fixed size buffer beacuse this interface will, by fiat, never + * support "UUIDs" longer than 16 bytes; we don't want to force all downstream + * users to have to deal with that. + */ +struct fsuuid2 { + __u8 len; + __u8 uuid[16]; +}; + +struct fs_sysfs_path { + __u8 len; + __u8 name[128]; +}; + +/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ +#define FILE_DEDUPE_RANGE_SAME 0 +#define FILE_DEDUPE_RANGE_DIFFERS 1 + +/* from struct btrfs_ioctl_file_extent_same_info */ +struct file_dedupe_range_info { + __s64 dest_fd; /* in - destination file */ + __u64 dest_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file. */ + /* status of this dedupe operation: + * < 0 for error + * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds + * == FILE_DEDUPE_RANGE_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; /* must be zero */ +}; + +/* from struct btrfs_ioctl_file_extent_same_args */ +struct file_dedupe_range { + __u64 src_offset; /* in - start of extent in source */ + __u64 src_length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; /* must be zero */ + __u32 reserved2; /* must be zero */ + struct file_dedupe_range_info info[]; +}; + +/* And dynamically-tunable limits and defaults: */ +struct files_stat_struct { + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ +}; + +struct inodes_stat_t { + long nr_inodes; + long nr_unused; + long dummy[5]; /* padding for sysctl ABI compatibility */ +}; + + +#define NR_FILE 8192 /* this can well be larger on a larger system */ + +/* + * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. + */ +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + __u32 fsx_projid; /* project identifier (get/set) */ + __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ + unsigned char fsx_pad[8]; +}; + +/* + * Flags for the fsx_xflags field + */ +#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ +#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ +#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ +#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ +#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ +#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ + +/* the read-only stuff doesn't really belong here, but any other place is + probably as bad and I don't want to create yet another include file. */ + +#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ +#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ +#define BLKRRPART _IO(0x12,95) /* re-read partition table */ +#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ +#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ +#define BLKRASET _IO(0x12,98) /* set read ahead for block device */ +#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ +#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ +#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ +#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ +#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ +#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ +#if 0 +#define BLKPG _IO(0x12,105)/* See blkpg.h */ + +/* Some people are morons. Do not use sizeof! */ + +#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ +#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ +/* This was here just to show that the number is taken - + probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ +#endif +/* A jump here: 108-111 have been used for various private purposes. */ +#define BLKBSZGET _IOR(0x12,112,size_t) +#define BLKBSZSET _IOW(0x12,113,size_t) +#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ +#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) +#define BLKTRACESTART _IO(0x12,116) +#define BLKTRACESTOP _IO(0x12,117) +#define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) +#define BLKDISCARDZEROES _IO(0x12,124) +#define BLKSECDISCARD _IO(0x12,125) +#define BLKROTATIONAL _IO(0x12,126) +#define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) +/* + * A jump here: 130-136 are reserved for zoned block devices + * (see uapi/linux/blkzoned.h) + */ + +#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ +#define FIBMAP _IO(0x00,1) /* bmap access */ +#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ +#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FICLONE _IOW(0x94, 9, int) +#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) +#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) + +#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */ + +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) +#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) +#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) +#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) +#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) +/* Returns the external filesystem UUID, the same one blkid returns */ +#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2) +/* + * Returns the path component under /sys/fs/ that refers to this filesystem; + * also /sys/kernel/debug/ for filesystems with debugfs exports + */ +#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + * + * Note: for historical reasons, these flags were originally used and + * defined for use by ext2/ext3, and then other file systems started + * using these flags so they wouldn't need to write their own version + * of chattr/lsattr (which was shipped as part of e2fsprogs). You + * should think twice before trying to use these flags in new + * contexts, or trying to assign these flags, since they are used both + * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are + * almost out of 32-bit flags. :-) + * + * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from + * XFS to the generic FS level interface. This uses a structure that + * has padding and hence has more room to grow, so it may be more + * appropriate for many new use cases. + * + * Please do not change these flags or interfaces before checking with + * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org. + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +/* End compression flags --- maybe not all used */ +#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ +#define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_VERITY_FL 0x00100000 /* Verity protected inode */ +#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ +#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ +#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + + +#define SYNC_FILE_RANGE_WAIT_BEFORE 1 +#define SYNC_FILE_RANGE_WRITE 2 +#define SYNC_FILE_RANGE_WAIT_AFTER 4 +#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \ + SYNC_FILE_RANGE_WAIT_BEFORE | \ + SYNC_FILE_RANGE_WAIT_AFTER) + +/* + * Flags for preadv2/pwritev2: + */ + +typedef int __bitwise __kernel_rwf_t; + +/* high priority request, poll if possible */ +#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) + +/* per-IO O_DSYNC */ +#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) + +/* per-IO O_SYNC */ +#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) + +/* per-IO, return -EAGAIN if operation would block */ +#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) + +/* per-IO O_APPEND */ +#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) + +/* per-IO negation of O_APPEND */ +#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) + +/* mask of flags supported by the kernel */ +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ + RWF_APPEND | RWF_NOAPPEND) + +#define PROCFS_IOCTL_MAGIC 'f' + +/* Pagemap ioctl */ +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) + +/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ +#define PAGE_IS_WPALLOWED (1 << 0) +#define PAGE_IS_WRITTEN (1 << 1) +#define PAGE_IS_FILE (1 << 2) +#define PAGE_IS_PRESENT (1 << 3) +#define PAGE_IS_SWAPPED (1 << 4) +#define PAGE_IS_PFNZERO (1 << 5) +#define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) + +/* + * struct page_region - Page region with flags + * @start: Start of the region + * @end: End of the region (exclusive) + * @categories: PAGE_IS_* category bitmask for the region + */ +struct page_region { + __u64 start; + __u64 end; + __u64 categories; +}; + +/* Flags for PAGEMAP_SCAN ioctl */ +#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */ +#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */ + +/* + * struct pm_scan_arg - Pagemap ioctl argument + * @size: Size of the structure + * @flags: Flags for the IOCTL + * @start: Starting address of the region + * @end: Ending address of the region + * @walk_end Address where the scan stopped (written by kernel). + * walk_end == end (address tags cleared) informs that the scan completed on entire range. + * @vec: Address of page_region struct array for output + * @vec_len: Length of the page_region struct array + * @max_pages: Optional limit for number of returned pages (0 = disabled) + * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1 + * @category_mask: Skip pages for which any category doesn't match + * @category_anyof_mask: Skip pages for which no category matches + * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned + */ +struct pm_scan_arg { + __u64 size; + __u64 flags; + __u64 start; + __u64 end; + __u64 walk_end; + __u64 vec; + __u64 vec_len; + __u64 max_pages; + __u64 category_inverted; + __u64 category_mask; + __u64 category_anyof_mask; + __u64 return_mask; +}; + +/* /proc/<pid>/maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc/<pid>/maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + +#endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index d03842abae57..e5af8c692dc0 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_PRE_FAULT_MEMORY 236 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1548,4 +1549,13 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) + +struct kvm_pre_fault_memory { + __u64 gpa; + __u64 size; + __u64 flags; + __u64 padding[5]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index a246e11988d5..e89d00528f2f 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -17,6 +17,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h new file mode 100644 index 000000000000..35791791a879 --- /dev/null +++ b/tools/include/uapi/linux/prctl.h @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_PRCTL_H +#define _LINUX_PRCTL_H + +#include <linux/types.h> + +/* Values to pass as first argument to prctl() */ + +#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ +#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */ + +/* Get/set current->mm->dumpable */ +#define PR_GET_DUMPABLE 3 +#define PR_SET_DUMPABLE 4 + +/* Get/set unaligned access control bits (if meaningful) */ +#define PR_GET_UNALIGN 5 +#define PR_SET_UNALIGN 6 +# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ +# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ + +/* Get/set whether or not to drop capabilities on setuid() away from + * uid 0 (as per security/commoncap.c) */ +#define PR_GET_KEEPCAPS 7 +#define PR_SET_KEEPCAPS 8 + +/* Get/set floating-point emulation control bits (if meaningful) */ +#define PR_GET_FPEMU 9 +#define PR_SET_FPEMU 10 +# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */ +# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */ + +/* Get/set floating-point exception mode (if meaningful) */ +#define PR_GET_FPEXC 11 +#define PR_SET_FPEXC 12 +# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */ +# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */ +# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */ +# define PR_FP_EXC_UND 0x040000 /* floating point underflow */ +# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */ +# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */ +# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */ +# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */ +# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */ +# define PR_FP_EXC_PRECISE 3 /* precise exception mode */ + +/* Get/set whether we use statistical process timing or accurate timestamp + * based process timing */ +#define PR_GET_TIMING 13 +#define PR_SET_TIMING 14 +# define PR_TIMING_STATISTICAL 0 /* Normal, traditional, + statistical process timing */ +# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based + process timing */ + +#define PR_SET_NAME 15 /* Set process name */ +#define PR_GET_NAME 16 /* Get process name */ + +/* Get/set process endian */ +#define PR_GET_ENDIAN 19 +#define PR_SET_ENDIAN 20 +# define PR_ENDIAN_BIG 0 +# define PR_ENDIAN_LITTLE 1 /* True little endian mode */ +# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ + +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + +/* Get/set the capability bounding set (as per security/commoncap.c) */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + +/* Get/set the process' ability to use the timestamp counter instruction */ +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ + +/* Get/set securebits (as per security/commoncap.c) */ +#define PR_GET_SECUREBITS 27 +#define PR_SET_SECUREBITS 28 + +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 + +/* + * Set early/late kill mode for hwpoison memory corruption. + * This influences when the process gets killed on a memory corruption. + */ +#define PR_MCE_KILL 33 +# define PR_MCE_KILL_CLEAR 0 +# define PR_MCE_KILL_SET 1 + +# define PR_MCE_KILL_LATE 0 +# define PR_MCE_KILL_EARLY 1 +# define PR_MCE_KILL_DEFAULT 2 + +#define PR_MCE_KILL_GET 34 + +/* + * Tune up process memory map specifics. + */ +#define PR_SET_MM 35 +# define PR_SET_MM_START_CODE 1 +# define PR_SET_MM_END_CODE 2 +# define PR_SET_MM_START_DATA 3 +# define PR_SET_MM_END_DATA 4 +# define PR_SET_MM_START_STACK 5 +# define PR_SET_MM_START_BRK 6 +# define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 +# define PR_SET_MM_MAP 14 +# define PR_SET_MM_MAP_SIZE 15 + +/* + * This structure provides new memory descriptor + * map which mostly modifies /proc/pid/stat[m] + * output for a task. This mostly done in a + * sake of checkpoint/restore functionality. + */ +struct prctl_mm_map { + __u64 start_code; /* code section bounds */ + __u64 end_code; + __u64 start_data; /* data section bounds */ + __u64 end_data; + __u64 start_brk; /* heap for brk() syscall */ + __u64 brk; + __u64 start_stack; /* stack starts at */ + __u64 arg_start; /* command line arguments bounds */ + __u64 arg_end; + __u64 env_start; /* environment variables bounds */ + __u64 env_end; + __u64 *auxv; /* auxiliary vector */ + __u32 auxv_size; /* vector size */ + __u32 exe_fd; /* /proc/$pid/exe link file */ +}; + +/* + * Set specific pid that is allowed to ptrace the current task. + * A value of 0 mean "no process". + */ +#define PR_SET_PTRACER 0x59616d61 +# define PR_SET_PTRACER_ANY ((unsigned long)-1) + +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + +/* + * If no_new_privs is set, then operations that grant new privileges (i.e. + * execve) will either fail or not grant them. This affects suid/sgid, + * file capabilities, and LSMs. + * + * Operations that merely manipulate or drop existing privileges (setresuid, + * capset, etc.) will still work. Drop those privileges if you want them gone. + * + * Changing LSM security domain is considered a new privilege. So, for example, + * asking selinux for a specific new context (e.g. with runcon) will result + * in execve returning -EPERM. + * + * See Documentation/userspace-api/no_new_privs.rst for more details. + */ +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 + +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + +/* + * No longer implemented, but left here to ensure the numbers stay reserved: + */ +#define PR_MPX_ENABLE_MANAGEMENT 43 +#define PR_MPX_DISABLE_MANAGEMENT 44 + +#define PR_SET_FP_MODE 45 +#define PR_GET_FP_MODE 46 +# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ +# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ + +/* Control the ambient capability set */ +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 + +/* arm64 Scalable Vector Extension controls */ +/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */ +#define PR_SVE_SET_VL 50 /* set task vector length */ +# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SVE_GET_VL 51 /* get task vector length */ +/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */ +# define PR_SVE_VL_LEN_MASK 0xffff +# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ + +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) + +/* Reset arm64 pointer authentication keys */ +#define PR_PAC_RESET_KEYS 54 +# define PR_PAC_APIAKEY (1UL << 0) +# define PR_PAC_APIBKEY (1UL << 1) +# define PR_PAC_APDAKEY (1UL << 2) +# define PR_PAC_APDBKEY (1UL << 3) +# define PR_PAC_APGAKEY (1UL << 4) + +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_NONE 0UL +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 + +/* Control reclaim behavior when allocating memory */ +#define PR_SET_IO_FLUSHER 57 +#define PR_GET_IO_FLUSHER 58 + +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 + +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 +# define PR_SCHED_CORE_SCOPE_THREAD 0 +# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 + +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + +/* Memory deny write / execute */ +#define PR_SET_MDWE 65 +# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) + +#define PR_GET_MDWE 66 + +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + +#define PR_GET_AUXV 0x41555856 + +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 + +#define PR_RISCV_V_SET_CONTROL 69 +#define PR_RISCV_V_GET_CONTROL 70 +# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0 +# define PR_RISCV_V_VSTATE_CTRL_OFF 1 +# define PR_RISCV_V_VSTATE_CTRL_ON 2 +# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4) +# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3 +# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc +# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + +#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71 +# define PR_RISCV_CTX_SW_FENCEI_ON 0 +# define PR_RISCV_CTX_SW_FENCEI_OFF 1 +# define PR_RISCV_SCOPE_PER_PROCESS 0 +# define PR_RISCV_SCOPE_PER_THREAD 1 + +/* PowerPC Dynamic Execution Control Register (DEXCR) controls */ +#define PR_PPC_GET_DEXCR 72 +#define PR_PPC_SET_DEXCR 73 +/* DEXCR aspect to act on */ +# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */ +# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */ +# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */ +# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */ +/* Action to apply / return */ +# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */ +# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */ +# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */ +# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */ +# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ +# define PR_PPC_DEXCR_CTRL_MASK 0x1f + +#endif /* _LINUX_PRCTL_H */ diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 10c067e3a8d2..69affa251fa7 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -52,7 +52,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -78,15 +77,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 7bb03606b9ea..15791c1c5b28 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -3,7 +3,7 @@ # include ../scripts/Makefile.include -BUILD_TARGETS=page-types slabinfo page_owner_sort +BUILD_TARGETS=page-types slabinfo page_owner_sort thp_swap_allocator_test INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps LIB_DIR = ../lib/api diff --git a/tools/mm/thp_swap_allocator_test.c b/tools/mm/thp_swap_allocator_test.c new file mode 100644 index 000000000000..83afc52275a5 --- /dev/null +++ b/tools/mm/thp_swap_allocator_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * thp_swap_allocator_test + * + * The purpose of this test program is helping check if THP swpout + * can correctly get swap slots to swap out as a whole instead of + * being split. It randomly releases swap entries through madvise + * DONTNEED and swapin/out on two memory areas: a memory area for + * 64KB THP and the other area for small folios. The second memory + * can be enabled by "-s". + * Before running the program, we need to setup a zRAM or similar + * swap device by: + * echo lzo > /sys/block/zram0/comp_algorithm + * echo 64M > /sys/block/zram0/disksize + * echo never > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled + * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled + * mkswap /dev/zram0 + * swapon /dev/zram0 + * The expected result should be 0% anon swpout fallback ratio w/ or + * w/o "-s". + * + * Author(s): Barry Song <v-songbaohua@oppo.com> + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <errno.h> +#include <time.h> + +#define MEMSIZE_MTHP (60 * 1024 * 1024) +#define MEMSIZE_SMALLFOLIO (4 * 1024 * 1024) +#define ALIGNMENT_MTHP (64 * 1024) +#define ALIGNMENT_SMALLFOLIO (4 * 1024) +#define TOTAL_DONTNEED_MTHP (16 * 1024 * 1024) +#define TOTAL_DONTNEED_SMALLFOLIO (1 * 1024 * 1024) +#define MTHP_FOLIO_SIZE (64 * 1024) + +#define SWPOUT_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout" +#define SWPOUT_FALLBACK_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback" + +static void *aligned_alloc_mem(size_t size, size_t alignment) +{ + void *mem = NULL; + + if (posix_memalign(&mem, alignment, size) != 0) { + perror("posix_memalign"); + return NULL; + } + return mem; +} + +/* + * This emulates the behavior of native libc and Java heap, + * as well as process exit and munmap. It helps generate mTHP + * and ensures that iterations can proceed with mTHP, as we + * currently don't support large folios swap-in. + */ +static void random_madvise_dontneed(void *mem, size_t mem_size, + size_t align_size, size_t total_dontneed_size) +{ + size_t num_pages = total_dontneed_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + if (madvise(addr, align_size, MADV_DONTNEED) != 0) + perror("madvise dontneed"); + + memset(addr, 0x11, align_size); + } +} + +static void random_swapin(void *mem, size_t mem_size, + size_t align_size, size_t total_swapin_size) +{ + size_t num_pages = total_swapin_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + memset(addr, 0x11, align_size); + } +} + +static unsigned long read_stat(const char *path) +{ + FILE *file; + unsigned long value; + + file = fopen(path, "r"); + if (!file) { + perror("fopen"); + return 0; + } + + if (fscanf(file, "%lu", &value) != 1) { + perror("fscanf"); + fclose(file); + return 0; + } + + fclose(file); + return value; +} + +int main(int argc, char *argv[]) +{ + int use_small_folio = 0, aligned_swapin = 0; + void *mem1 = NULL, *mem2 = NULL; + int i; + + for (i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-s") == 0) + use_small_folio = 1; + else if (strcmp(argv[i], "-a") == 0) + aligned_swapin = 1; + } + + mem1 = aligned_alloc_mem(MEMSIZE_MTHP, ALIGNMENT_MTHP); + if (mem1 == NULL) { + fprintf(stderr, "Failed to allocate large folios memory\n"); + return EXIT_FAILURE; + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_HUGEPAGE) != 0) { + perror("madvise hugepage for mem1"); + free(mem1); + return EXIT_FAILURE; + } + + if (use_small_folio) { + mem2 = aligned_alloc_mem(MEMSIZE_SMALLFOLIO, ALIGNMENT_MTHP); + if (mem2 == NULL) { + fprintf(stderr, "Failed to allocate small folios memory\n"); + free(mem1); + return EXIT_FAILURE; + } + + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_NOHUGEPAGE) != 0) { + perror("madvise nohugepage for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + /* warm-up phase to occupy the swapfile */ + memset(mem1, 0x11, MEMSIZE_MTHP); + madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT); + if (use_small_folio) { + memset(mem2, 0x11, MEMSIZE_SMALLFOLIO); + madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT); + } + + /* iterations with newly created mTHP, swap-in, and swap-out */ + for (i = 0; i < 100; ++i) { + unsigned long initial_swpout; + unsigned long initial_swpout_fallback; + unsigned long final_swpout; + unsigned long final_swpout_fallback; + unsigned long swpout_inc; + unsigned long swpout_fallback_inc; + double fallback_percentage; + + initial_swpout = read_stat(SWPOUT_PATH); + initial_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + /* + * The following setup creates a 1:1 ratio of mTHP to small folios + * since large folio swap-in isn't supported yet. Once we support + * mTHP swap-in, we'll likely need to reduce MEMSIZE_MTHP and + * increase MEMSIZE_SMALLFOLIO to maintain the ratio. + */ + random_swapin(mem1, MEMSIZE_MTHP, + aligned_swapin ? ALIGNMENT_MTHP : ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_MTHP); + random_madvise_dontneed(mem1, MEMSIZE_MTHP, ALIGNMENT_MTHP, + TOTAL_DONTNEED_MTHP); + + if (use_small_folio) { + random_swapin(mem2, MEMSIZE_SMALLFOLIO, + ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_SMALLFOLIO); + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem1"); + free(mem1); + if (mem2 != NULL) + free(mem2); + return EXIT_FAILURE; + } + + if (use_small_folio) { + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + final_swpout = read_stat(SWPOUT_PATH); + final_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + swpout_inc = final_swpout - initial_swpout; + swpout_fallback_inc = final_swpout_fallback - initial_swpout_fallback; + + fallback_percentage = (double)swpout_fallback_inc / + (swpout_fallback_inc + swpout_inc) * 100; + + printf("Iteration %d: swpout inc: %lu, swpout fallback inc: %lu, Fallback percentage: %.2f%%\n", + i + 1, swpout_inc, swpout_fallback_inc, fallback_percentage); + } + + free(mem1); + if (mem2 != NULL) + free(mem2); + + return EXIT_SUCCESS; +} diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0a33d9195b7a..01237d167223 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1202,6 +1202,8 @@ static const char *uaccess_safe_builtin[] = { "__sanitizer_cov_trace_switch", /* KMSAN */ "kmsan_copy_to_user", + "kmsan_disable_current", + "kmsan_enable_current", "kmsan_report", "kmsan_unpoison_entry_regs", "kmsan_unpoison_memory", diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a4829b6532d8..c896babf7a74 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -182,20 +182,15 @@ endif FEATURE_CHECK_CFLAGS-libzstd := $(LIBZSTD_CFLAGS) FEATURE_CHECK_LDFLAGS-libzstd := $(LIBZSTD_LDFLAGS) +# for linking with debug library, run like: +# make DEBUG=1 PKG_CONFIG_PATH=/opt/libtraceevent/(lib|lib64)/pkgconfig + ifneq ($(NO_LIBTRACEEVENT),1) ifeq ($(call get-executable,$(PKG_CONFIG)),) $(error Error: $(PKG_CONFIG) needed by libtraceevent is missing on this system, please install it) endif endif -# for linking with debug library, run like: -# make DEBUG=1 PKG_CONFIG_PATH=/opt/libtraceevent/(lib|lib64)/pkgconfig -FEATURE_CHECK_CFLAGS-libtraceevent := $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -FEATURE_CHECK_LDFLAGS-libtraceevent := $(shell $(PKG_CONFIG) --libs libtraceevent 2>/dev/null) - -FEATURE_CHECK_CFLAGS-libtracefs := $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -FEATURE_CHECK_LDFLAGS-libtracefs := $(shell $(PKG_CONFIG) --libs libtracefs 2>/dev/null) - FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi # include ARCH specific config -include $(src-perf)/arch/$(SRCARCH)/Makefile @@ -1206,6 +1201,8 @@ ifneq ($(NO_LIBTRACEEVENT),1) LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) + else + $(warning libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) endif endif diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index 3992a67a87d9..c89d6bb6b184 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index 2386ebbf6dd4..b6b97de48233 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,5 +1,7 @@ +perf-util-y += header.o perf-util-y += perf_regs.o perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c new file mode 100644 index 000000000000..d962dff55512 --- /dev/null +++ b/tools/perf/arch/loongarch/util/header.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin <n.shubin@yadro.com> + * Bibo Mao <maobibo@loongson.cn> + * Huacai Chen <chenhuacai@loongson.cn> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <api/fs/fs.h> +#include <errno.h> +#include "util/debug.h" +#include "util/header.h" + +/* + * Output example from /proc/cpuinfo + * CPU Family : Loongson-64bit + * Model Name : Loongson-3C5000 + * CPU Revision : 0x10 + * FPU Revision : 0x01 + */ +#define CPUINFO_MODEL "Model Name" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + unsigned long line_sz; + char *line, *model, *cpuid; + FILE *file; + + file = fopen(CPUINFO, "r"); + if (file == NULL) + return NULL; + + line = model = cpuid = NULL; + while (getline(&line, &line_sz, file) != -1) { + if (strncmp(line, CPUINFO_MODEL, strlen(CPUINFO_MODEL))) + continue; + + model = _get_field(line); + if (!model) + goto out_free; + break; + } + + if (model && (asprintf(&cpuid, "%s", model) < 0)) + cpuid = NULL; + +out_free: + fclose(file); + free(model); + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + int ret = 0; + char *cpuid = _get_cpuid(); + + if (!cpuid) + return EINVAL; + + if (sz < strlen(cpuid)) { + ret = ENOBUFS; + goto out_free; + } + + scnprintf(buffer, sz, "%s", cpuid); + +out_free: + free(cpuid); + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} diff --git a/tools/perf/arch/loongarch/util/kvm-stat.c b/tools/perf/arch/loongarch/util/kvm-stat.c new file mode 100644 index 000000000000..a7859a3a9a51 --- /dev/null +++ b/tools/perf/arch/loongarch/util/kvm-stat.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <memory.h> +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/pmus.h" + +#define LOONGARCH_EXCEPTION_INT 0 +#define LOONGARCH_EXCEPTION_PIL 1 +#define LOONGARCH_EXCEPTION_PIS 2 +#define LOONGARCH_EXCEPTION_PIF 3 +#define LOONGARCH_EXCEPTION_PME 4 +#define LOONGARCH_EXCEPTION_FPD 15 +#define LOONGARCH_EXCEPTION_SXD 16 +#define LOONGARCH_EXCEPTION_ASXD 17 +#define LOONGARCH_EXCEPTION_GSPR 22 +#define LOONGARCH_EXCEPTION_CPUCFG 100 +#define LOONGARCH_EXCEPTION_CSR 101 +#define LOONGARCH_EXCEPTION_IOCSR 102 +#define LOONGARCH_EXCEPTION_IDLE 103 +#define LOONGARCH_EXCEPTION_OTHERS 104 +#define LOONGARCH_EXCEPTION_HVC 23 + +#define loongarch_exception_type \ + {LOONGARCH_EXCEPTION_INT, "Interrupt" }, \ + {LOONGARCH_EXCEPTION_PIL, "Mem Read" }, \ + {LOONGARCH_EXCEPTION_PIS, "Mem Store" }, \ + {LOONGARCH_EXCEPTION_PIF, "Inst Fetch" }, \ + {LOONGARCH_EXCEPTION_PME, "Mem Modify" }, \ + {LOONGARCH_EXCEPTION_FPD, "FPU" }, \ + {LOONGARCH_EXCEPTION_SXD, "LSX" }, \ + {LOONGARCH_EXCEPTION_ASXD, "LASX" }, \ + {LOONGARCH_EXCEPTION_GSPR, "Privilege Error" }, \ + {LOONGARCH_EXCEPTION_HVC, "Hypercall" }, \ + {LOONGARCH_EXCEPTION_CPUCFG, "CPUCFG" }, \ + {LOONGARCH_EXCEPTION_CSR, "CSR" }, \ + {LOONGARCH_EXCEPTION_IOCSR, "IOCSR" }, \ + {LOONGARCH_EXCEPTION_IDLE, "Idle" }, \ + {LOONGARCH_EXCEPTION_OTHERS, "Others" } + +define_exit_reasons_table(loongarch_exit_reasons, loongarch_exception_type); + +const char *vcpu_id_str = "vcpu_id"; +const char *kvm_exit_reason = "reason"; +const char *kvm_entry_trace = "kvm:kvm_enter"; +const char *kvm_reenter_trace = "kvm:kvm_reenter"; +const char *kvm_exit_trace = "kvm:kvm_exit"; +const char *kvm_events_tp[] = { + "kvm:kvm_enter", + "kvm:kvm_reenter", + "kvm:kvm_exit", + "kvm:kvm_exit_gspr", + NULL, +}; + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + return exit_event_begin(evsel, sample, key); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + /* + * LoongArch kvm is different with other architectures + * + * There is kvm:kvm_reenter or kvm:kvm_enter event adjacent with + * kvm:kvm_exit event. + * kvm:kvm_enter means returning to vmm and then to guest + * kvm:kvm_reenter means returning to guest immediately + */ + return evsel__name_is(evsel, kvm_entry_trace) || evsel__name_is(evsel, kvm_reenter_trace); +} + +static void event_gspr_get_key(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + unsigned int insn; + + key->key = LOONGARCH_EXCEPTION_OTHERS; + insn = evsel__intval(evsel, sample, "inst_word"); + + switch (insn >> 24) { + case 0: + /* CPUCFG inst trap */ + if ((insn >> 10) == 0x1b) + key->key = LOONGARCH_EXCEPTION_CPUCFG; + break; + case 4: + /* CSR inst trap */ + key->key = LOONGARCH_EXCEPTION_CSR; + break; + case 6: + /* IOCSR inst trap */ + if ((insn >> 15) == 0xc90) + key->key = LOONGARCH_EXCEPTION_IOCSR; + else if ((insn >> 15) == 0xc91) + /* Idle inst trap */ + key->key = LOONGARCH_EXCEPTION_IDLE; + break; + default: + key->key = LOONGARCH_EXCEPTION_OTHERS; + break; + } +} + +static struct child_event_ops child_events[] = { + { .name = "kvm:kvm_exit_gspr", .get_key = event_gspr_get_key }, + { NULL, NULL }, +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .child_ops = child_events, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events, }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "loongarch64"; + kvm->exit_reasons = loongarch_exit_reasons; + return 0; +} diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile index a8d25d005207..90c3c476a242 100644 --- a/tools/perf/arch/riscv/Makefile +++ b/tools/perf/arch/riscv/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 65ec3c66a375..f865cb0489ec 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,5 +1,6 @@ perf-util-y += perf_regs.o perf-util-y += header.o +perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/kvm-stat.c b/tools/perf/arch/riscv/util/kvm-stat.c new file mode 100644 index 000000000000..491aef449d1a --- /dev/null +++ b/tools/perf/arch/riscv/util/kvm-stat.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arch specific functions for perf kvm stat. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include <errno.h> +#include <memory.h> +#include "../../../util/evsel.h" +#include "../../../util/kvm-stat.h" +#include "riscv_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_exception_class); + +const char *vcpu_id_str = "id"; +const char *kvm_exit_reason = "scause"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = riscv_exit_reasons; +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return evsel__name_is(evsel, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (evsel__name_is(evsel, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "riscv64"; + return 0; +} diff --git a/tools/perf/arch/riscv/util/riscv_exception_types.h b/tools/perf/arch/riscv/util/riscv_exception_types.h new file mode 100644 index 000000000000..c49b8fa5e847 --- /dev/null +++ b/tools/perf/arch/riscv/util/riscv_exception_types.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_RISCV_EXCEPTION_TYPES_H +#define ARCH_PERF_RISCV_EXCEPTION_TYPES_H + +#define EXC_INST_MISALIGNED 0 +#define EXC_INST_ACCESS 1 +#define EXC_INST_ILLEGAL 2 +#define EXC_BREAKPOINT 3 +#define EXC_LOAD_MISALIGNED 4 +#define EXC_LOAD_ACCESS 5 +#define EXC_STORE_MISALIGNED 6 +#define EXC_STORE_ACCESS 7 +#define EXC_SYSCALL 8 +#define EXC_HYPERVISOR_SYSCALL 9 +#define EXC_SUPERVISOR_SYSCALL 10 +#define EXC_INST_PAGE_FAULT 12 +#define EXC_LOAD_PAGE_FAULT 13 +#define EXC_STORE_PAGE_FAULT 15 +#define EXC_INST_GUEST_PAGE_FAULT 20 +#define EXC_LOAD_GUEST_PAGE_FAULT 21 +#define EXC_VIRTUAL_INST_FAULT 22 +#define EXC_STORE_GUEST_PAGE_FAULT 23 + +#define EXC(x) {EXC_##x, #x } + +#define kvm_riscv_exception_class \ + EXC(INST_MISALIGNED), EXC(INST_ACCESS), EXC(INST_ILLEGAL), \ + EXC(BREAKPOINT), EXC(LOAD_MISALIGNED), EXC(LOAD_ACCESS), \ + EXC(STORE_MISALIGNED), EXC(STORE_ACCESS), EXC(SYSCALL), \ + EXC(HYPERVISOR_SYSCALL), EXC(SUPERVISOR_SYSCALL), \ + EXC(INST_PAGE_FAULT), EXC(LOAD_PAGE_FAULT), EXC(STORE_PAGE_FAULT), \ + EXC(INST_GUEST_PAGE_FAULT), EXC(LOAD_GUEST_PAGE_FAULT), \ + EXC(VIRTUAL_INST_FAULT), EXC(STORE_GUEST_PAGE_FAULT) + +#endif /* ARCH_PERF_RISCV_EXCEPTION_TYPES_H */ diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index e30fd55f8e51..cd3b480d20bd 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -26,7 +26,6 @@ static bool is_ignored_symbol(const char *name, char type) * when --all-symbols is specified so exclude them to get a * stable symbol list. */ - "kallsyms_addresses", "kallsyms_offsets", "kallsyms_relative_base", "kallsyms_num_syms", diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 2340c4f6d0c2..67414944f245 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1501,7 +1501,7 @@ void dso__delete(struct dso *dso) auxtrace_cache__free(RC_CHK_ACCESS(dso)->auxtrace_cache); dso_cache__free(dso); dso__free_a2l(dso); - zfree(&RC_CHK_ACCESS(dso)->symsrc_filename); + dso__free_symsrc_filename(dso); nsinfo__zput(RC_CHK_ACCESS(dso)->nsinfo); mutex_destroy(dso__lock(dso)); RC_CHK_FREE(dso); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 878c1f441868..ed0068251c65 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -602,6 +602,11 @@ static inline void dso__set_symsrc_filename(struct dso *dso, char *val) RC_CHK_ACCESS(dso)->symsrc_filename = val; } +static inline void dso__free_symsrc_filename(struct dso *dso) +{ + zfree(&RC_CHK_ACCESS(dso)->symsrc_filename); +} + static inline enum dso_binary_type dso__symtab_type(const struct dso *dso) { return RC_CHK_ACCESS(dso)->symtab_type; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index f6a6f6a91030..16c2b03831f3 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -413,7 +413,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, __func__, dso__symsrc_filename(dso), debuglink); - zfree(&dso__symsrc_filename(dso)); + dso__free_symsrc_filename(dso); } dso__set_symsrc_filename(dso, debuglink); } else { diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index ea956082e6a4..e4313726fae3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -407,4 +407,5 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *g } EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm); +MODULE_DESCRIPTION("NVDIMM unit test"); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index b438f3d053ee..892e990c034a 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -987,5 +987,6 @@ static __exit void ndtest_exit(void) module_init(ndtest_init); module_exit(ndtest_exit); +MODULE_DESCRIPTION("Test non-NFIT devices"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a61df347a33d..cfd4378e2129 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3382,5 +3382,6 @@ static __exit void nfit_test_exit(void) module_init(nfit_test_init); module_exit(nfit_test_exit); +MODULE_DESCRIPTION("Test ACPI NFIT devices"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index ca24f6839d50..84b8c3c92c79 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -424,6 +424,7 @@ void idr_checks(void) #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) void ida_dump(struct ida *); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index f1caf4bcf937..cd1cf05503b4 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -19,6 +19,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c index f20e12cbbfd4..d0e53bff1eb6 100644 --- a/tools/testing/radix-tree/xarray.c +++ b/tools/testing/radix-tree/xarray.c @@ -10,6 +10,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 06eed383fdc0..bc8fe9e8f7f2 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -13,7 +13,8 @@ TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += damon -TARGETS += devices +TARGETS += devices/error_logs +TARGETS += devices/probe TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice @@ -252,6 +253,7 @@ ifdef INSTALL_PATH install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/ + install -m 744 kselftest/ksft.py $(INSTALL_PATH)/kselftest/ install -m 744 run_kselftest.sh $(INSTALL_PATH)/ rm -f $(TEST_LIST) @ret=1; \ diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config index 97d549ee894f..39f979690dd3 100644 --- a/tools/testing/selftests/cgroup/config +++ b/tools/testing/selftests/cgroup/config @@ -3,5 +3,4 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_MEMCG=y -CONFIG_MEMCG_KMEM=y CONFIG_PAGE_COUNTER=y diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 29a22f50e762..1e2e98cc809d 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,7 +4,7 @@ TEST_GEN_FILES += huge_count_read_write TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race TEST_GEN_FILES += debugfs_target_ids_pid_leak -TEST_GEN_FILES += access_memory +TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _chk_dependency.sh _debugfs_common.sh @@ -13,6 +13,7 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += sysfs.sh TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py +TEST_PROGS += damos_tried_regions.py damon_nr_regions.py TEST_PROGS += reclaim.sh lru_sort.sh # regression tests (reproducers of previously found bugs) diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 2bd44c32be1b..6e136dc3df19 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -175,16 +175,24 @@ class DamosStats: self.sz_applied = sz_applied self.qt_exceeds = qt_exceeds +class DamosTriedRegion: + def __init__(self, start, end, nr_accesses, age): + self.start = start + self.end = end + self.nr_accesses = nr_accesses + self.age = age + class Damos: action = None access_pattern = None quota = None apply_interval_us = None - # todo: Support watermarks, stats, tried_regions + # todo: Support watermarks, stats idx = None context = None tried_bytes = None stats = None + tried_regions = None def __init__(self, action='stat', access_pattern=DamosAccessPattern(), quota=DamosQuota(), apply_interval_us=0): @@ -398,6 +406,35 @@ class Kdamond: err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') return err + def stop(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'off') + return err + + def update_schemes_tried_regions(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_regions') + if err is not None: + return err + for context in self.contexts: + for scheme in context.schemes: + tried_regions = [] + tried_regions_dir = os.path.join( + scheme.sysfs_dir(), 'tried_regions') + for filename in os.listdir( + os.path.join(scheme.sysfs_dir(), 'tried_regions')): + tried_region_dir = os.path.join(tried_regions_dir, filename) + if not os.path.isdir(tried_region_dir): + continue + region_values = [] + for f in ['start', 'end', 'nr_accesses', 'age']: + content, err = read_file( + os.path.join(tried_region_dir, f)) + if err is not None: + return err + region_values.append(int(content)) + tried_regions.append(DamosTriedRegion(*region_values)) + scheme.tried_regions = tried_regions + def update_schemes_tried_bytes(self): err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'update_schemes_tried_bytes') @@ -444,6 +481,25 @@ class Kdamond: goal.effective_bytes = int(content) return None + def commit(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err is not None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err is not None: + return err + + for context in self.contexts: + err = context.stage() + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'commit') + return err + + def commit_schemes_quota_goals(self): for context in self.contexts: for scheme in context.schemes: @@ -478,3 +534,10 @@ class Kdamonds: if err is not None: return err return None + + def stop(self): + for kdamond in self.kdamonds: + err = kdamond.stop() + if err is not None: + return err + return None diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c index 585a2fa54329..56b17e8fe1be 100644 --- a/tools/testing/selftests/damon/access_memory.c +++ b/tools/testing/selftests/damon/access_memory.c @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) start_clock = clock(); while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC < access_time_ms) - memset(regions[i], i, 1024 * 1024 * 10); + memset(regions[i], i, sz_region); } return 0; } diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c new file mode 100644 index 000000000000..3be121487432 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + * + * Receives number of regions and size of each region from user. Allocate the + * regions and repeatedly access even numbered (starting from zero) regions. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 3) { + printf("Usage: %s <number> <size (bytes)>\n", argv[0]); + return -1; + } + + nr_regions = atoi(argv[1]); + sz_region = atoi(argv[2]); + + regions = malloc(sizeof(*regions) * nr_regions); + for (i = 0; i < nr_regions; i++) + regions[i] = malloc(sz_region); + + while (1) { + for (i = 0; i < nr_regions; i++) { + if (i % 2 == 0) + memset(regions[i], i, sz_region); + } + } + return 0; +} diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py new file mode 100644 index 000000000000..2e8a74aff543 --- /dev/null +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): + ''' + Create process of the given 'real_nr_regions' regions, monitor it using + DAMON with given '{min,max}_nr_regions' monitoring parameter. + + Exit with non-zero return code if the given {min,max}_nr_regions is not + kept. + ''' + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '%d' % real_nr_regions, + '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + min_nr_regions=min_nr_regions, + max_nr_regions=max_nr_regions), + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + collected_nr_regions = [] + while proc.poll() is None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + collected_nr_regions.append(nr_tried_regions) + if len(collected_nr_regions) > 10: + break + proc.terminate() + kdamonds.stop() + + test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( + real_nr_regions, min_nr_regions, max_nr_regions) + if (collected_nr_regions[0] < min_nr_regions or + collected_nr_regions[-1] > max_nr_regions): + print('fail %s' % test_name) + print('number of regions that collected are:') + for nr in collected_nr_regions: + print(nr) + exit(1) + print('pass %s ' % test_name) + +def main(): + # test min_nr_regions larger than real nr regions + test_nr_regions(10, 20, 100) + + # test max_nr_regions smaller than real nr regions + test_nr_regions(15, 3, 10) + + # test online-tuned max_nr_regions that smaller than real nr regions + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + min_nr_regions=10, max_nr_regions=1000), + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + # wait until the real regions are found + time.sleep(3) + + attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs + attrs.min_nr_regions = 3 + attrs.max_nr_regions = 7 + err = kdamonds.kdamonds[0].commit() + if err is not None: + proc.terminate() + print('commit failed: %s' % err) + exit(1) + # wait for next merge operation is executed + time.sleep(0.3) + + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + proc.terminate() + + if nr_tried_regions > 7: + print('fail online-tuned max_nr_regions: %d > 7' % nr_tried_regions) + exit(1) + print('pass online-tuned max_nr_regions') + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/damos_tried_regions.py b/tools/testing/selftests/damon/damos_tried_regions.py new file mode 100644 index 000000000000..3b347eb28bd2 --- /dev/null +++ b/tools/testing/selftests/damon/damos_tried_regions.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + # repeatedly access even-numbered ones in 14 regions of 10 MiB size + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + collected_nr_regions = [] + while proc.poll() is None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + collected_nr_regions.append(nr_tried_regions) + if len(collected_nr_regions) > 10: + break + proc.terminate() + + collected_nr_regions.sort() + sample = collected_nr_regions[4] + print('50-th percentile nr_regions: %d' % sample) + print('expectation (>= 14) is %s' % 'met' if sample >= 14 else 'not met') + if collected_nr_regions[4] < 14: + print('full nr_regions:') + print('\n'.join(collected_nr_regions)) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/devices/Makefile b/tools/testing/selftests/devices/Makefile deleted file mode 100644 index ca29249b30c3..000000000000 --- a/tools/testing/selftests/devices/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -TEST_PROGS := test_discoverable_devices.py -TEST_FILES := boards ksft.py - -include ../lib.mk diff --git a/tools/testing/selftests/devices/error_logs/Makefile b/tools/testing/selftests/devices/error_logs/Makefile new file mode 100644 index 000000000000..d546c3fb0a7f --- /dev/null +++ b/tools/testing/selftests/devices/error_logs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := test_device_error_logs.py + +include ../../lib.mk diff --git a/tools/testing/selftests/devices/error_logs/test_device_error_logs.py b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py new file mode 100755 index 000000000000..3dd56c8ec92c --- /dev/null +++ b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2024 Collabora Ltd +# +# This test checks for the presence of error (or more critical) log messages +# coming from devices in the kernel log. +# +# One failed test case is reported for each device that has outputted error +# logs. Devices with no errors do not produce a passing test case to avoid +# polluting the results, therefore a successful run will list 0 tests run. +# + +import glob +import os +import re +import sys + +# Allow ksft module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../../kselftest/")) + +import ksft + +kmsg = "/dev/kmsg" + +RE_log = re.compile( + r"(?P<prefix>[0-9]+),(?P<sequence>[0-9]+),(?P<timestamp>[0-9]+),(?P<flag>[^;]*)(,[^;]*)*;(?P<message>.*)" +) +RE_tag = re.compile(r" (?P<key>[^=]+)=(?P<value>.*)") + +PREFIX_ERROR = 3 + +logs = [] +error_log_per_device = {} + + +def parse_kmsg(): + current_log = {} + + with open(kmsg) as f: + os.set_blocking(f.fileno(), False) + + for line in f: + tag_line = RE_tag.match(line) + log_line = RE_log.match(line) + + if log_line: + if current_log: + logs.append(current_log) # Save last log + + current_log = { + "prefix": int(log_line.group("prefix")), + "sequence": int(log_line.group("sequence")), + "timestamp": int(log_line.group("timestamp")), + "flag": log_line.group("flag"), + "message": log_line.group("message"), + } + elif tag_line: + current_log[tag_line.group("key")] = tag_line.group("value") + + +def generate_per_device_error_log(): + for log in logs: + if log.get("DEVICE") and log["prefix"] <= PREFIX_ERROR: + if not error_log_per_device.get(log["DEVICE"]): + error_log_per_device[log["DEVICE"]] = [] + error_log_per_device[log["DEVICE"]].append(log) + + +parse_kmsg() + +generate_per_device_error_log() +num_tests = len(error_log_per_device) + +ksft.print_header() +ksft.set_plan(num_tests) + +for device in error_log_per_device: + for log in error_log_per_device[device]: + ksft.print_msg(log["message"]) + ksft.test_result_fail(device) +if num_tests == 0: + ksft.print_msg("No device error logs found") +ksft.finished() diff --git a/tools/testing/selftests/devices/probe/Makefile b/tools/testing/selftests/devices/probe/Makefile new file mode 100644 index 000000000000..f630108c3fdf --- /dev/null +++ b/tools/testing/selftests/devices/probe/Makefile @@ -0,0 +1,4 @@ +TEST_PROGS := test_discoverable_devices.py +TEST_FILES := boards + +include ../../lib.mk diff --git a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml index ff932eb19f0b..ff932eb19f0b 100644 --- a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml +++ b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml diff --git a/tools/testing/selftests/devices/boards/google,spherion.yaml b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml index 17157ecd8c14..3ea843324797 100644 --- a/tools/testing/selftests/devices/boards/google,spherion.yaml +++ b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml @@ -11,6 +11,10 @@ # this, several optional keys can be used: # - dt-mmio: identify the MMIO address of the controller as defined in the # Devicetree. +# - of-fullname-regex: regular expression to match against the OF_FULLNAME +# property. Useful when the controller's address is not unique across other +# sibling controllers. In this case, dt-mmio can't be used, and this property +# allows the matching to include parent nodes as well to make it unique. # - usb-version: for USB controllers to differentiate between USB3 and USB2 # buses sharing the same controller. # - acpi-uid: _UID property of the controller as supplied by the ACPI. Useful to diff --git a/tools/testing/selftests/devices/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py index fbae8deb593d..d94a74b8a054 100755 --- a/tools/testing/selftests/devices/test_discoverable_devices.py +++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py @@ -14,13 +14,19 @@ # the description and examples of the file structure and vocabulary. # +import argparse import glob -import ksft import os import re import sys import yaml +# Allow ksft module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../../kselftest/")) + +import ksft + pci_controllers = [] usb_controllers = [] @@ -63,6 +69,22 @@ def get_dt_mmio(sysfs_dev_dir): sysfs_dev_dir = os.path.dirname(sysfs_dev_dir) +def get_of_fullname(sysfs_dev_dir): + re_of_fullname = re.compile("OF_FULLNAME=(.*)") + of_full_name = None + + # PCI controllers' sysfs don't have an of_node, so have to read it from the + # parent + while not of_full_name: + try: + with open(os.path.join(sysfs_dev_dir, "uevent")) as f: + of_fullname = re_of_fullname.search(f.read()).group(1) + return of_fullname + except: + pass + sysfs_dev_dir = os.path.dirname(sysfs_dev_dir) + + def get_acpi_uid(sysfs_dev_dir): with open(os.path.join(sysfs_dev_dir, "firmware_node", "uid")) as f: return f.read() @@ -96,6 +118,11 @@ def find_controller_in_sysfs(controller, parent_sysfs=None): if str(controller["dt-mmio"]) != get_dt_mmio(c): continue + if controller.get("of-fullname-regex"): + re_of_fullname = re.compile(str(controller["of-fullname-regex"])) + if not re_of_fullname.match(get_of_fullname(c)): + continue + if controller.get("usb-version"): if controller["usb-version"] != get_usb_version(c): continue @@ -194,6 +221,9 @@ def generate_pathname(device): if device.get("dt-mmio"): pathname += "@" + str(device["dt-mmio"]) + if device.get("of-fullname-regex"): + pathname += "-" + str(device["of-fullname-regex"]) + if device.get("name"): pathname = pathname + "/" + device["name"] @@ -296,14 +326,24 @@ def run_test(yaml_file): parse_device_tree_node(device_tree) +parser = argparse.ArgumentParser() +parser.add_argument( + "--boards-dir", default="boards", help="Directory containing the board YAML files" +) +args = parser.parse_args() + find_pci_controller_dirs() find_usb_controller_dirs() ksft.print_header() +if not os.path.exists(args.boards_dir): + ksft.print_msg(f"Boards directory '{args.boards_dir}' doesn't exist") + ksft.exit_fail() + board_file = "" for board_filename in get_board_filenames(): - full_board_filename = os.path.join("boards", board_filename + ".yaml") + full_board_filename = os.path.join(args.boards_dir, board_filename + ".yaml") if os.path.exists(full_board_filename): board_file = full_board_filename diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index c812080e304e..6062723a172e 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -9,52 +9,162 @@ #include <errno.h> #include <fcntl.h> #include <malloc.h> +#include <stdbool.h> #include <sys/ioctl.h> #include <sys/syscall.h> +#include <sys/mman.h> #include <linux/memfd.h> #include <linux/udmabuf.h> +#include "../../kselftest.h" #define TEST_PREFIX "drivers/dma-buf/udmabuf" #define NUM_PAGES 4 +#define NUM_ENTRIES 4 +#define MEMFD_SIZE 1024 /* in pages */ -static int memfd_create(const char *name, unsigned int flags) +static unsigned int page_size; + +static int create_memfd_with_seals(off64_t size, bool hpage) +{ + int memfd, ret; + unsigned int flags = MFD_ALLOW_SEALING; + + if (hpage) + flags |= MFD_HUGETLB; + + memfd = memfd_create("udmabuf-test", flags); + if (memfd < 0) { + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = ftruncate(memfd, size); + if (ret == -1) { + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return memfd; +} + +static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size) +{ + struct udmabuf_create_list *list; + int ubuf_fd, i; + + list = malloc(sizeof(struct udmabuf_create_list) + + sizeof(struct udmabuf_create_item) * NUM_ENTRIES); + if (!list) { + ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + for (i = 0; i < NUM_ENTRIES; i++) { + list->list[i].memfd = memfd; + list->list[i].offset = i * (memfd_size / NUM_ENTRIES); + list->list[i].size = getpagesize() * NUM_PAGES; + } + + list->count = NUM_ENTRIES; + list->flags = UDMABUF_FLAGS_CLOEXEC; + ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list); + free(list); + if (ubuf_fd < 0) { + ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return ubuf_fd; +} + +static void write_to_memfd(void *addr, off64_t size, char chr) +{ + int i; + + for (i = 0; i < size / page_size; i++) { + *((char *)addr + (i * page_size)) = chr; + } +} + +static void *mmap_fd(int fd, off64_t size) +{ + void *addr; + + addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return addr; +} + +static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size) { - return syscall(__NR_memfd_create, name, flags); + off64_t off; + int i = 0, j, k = 0, ret = 0; + char char1, char2; + + while (i < NUM_ENTRIES) { + off = i * (memfd_size / NUM_ENTRIES); + for (j = 0; j < NUM_PAGES; j++, k++) { + char1 = *((char *)addr1 + off + (j * getpagesize())); + char2 = *((char *)addr2 + (k * getpagesize())); + if (char1 != char2) { + ret = -1; + goto err; + } + } + i++; + } +err: + munmap(addr1, memfd_size); + munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize()); + return ret; } int main(int argc, char *argv[]) { struct udmabuf_create create; int devfd, memfd, buf, ret; - off_t size; - void *mem; + off64_t size; + void *addr1, *addr2; + + ksft_print_header(); + ksft_set_plan(6); devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { - printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - exit(77); + ksft_print_msg( + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + exit(KSFT_SKIP); } memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); if (memfd < 0) { - printf("%s: [skip,no-memfd]\n", TEST_PREFIX); - exit(77); + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); } ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); if (ret < 0) { - printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); - exit(77); + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); } - size = getpagesize() * NUM_PAGES; ret = ftruncate(memfd, size); if (ret == -1) { - printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); - exit(1); + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); } memset(&create, 0, sizeof(create)); @@ -64,44 +174,86 @@ int main(int argc, char *argv[]) create.offset = getpagesize()/2; create.size = getpagesize(); buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-1]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX); /* should fail (size not multiple of page) */ create.memfd = memfd; create.offset = 0; create.size = getpagesize()/2; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-2]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX); /* should fail (not memfd) */ create.memfd = 0; /* stdin */ create.offset = 0; create.size = size; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-3]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX); /* should work */ + page_size = getpagesize(); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); create.memfd = memfd; create.offset = 0; create.size = size; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf < 0) { - printf("%s: [FAIL,test-4]\n", TEST_PREFIX); - exit(1); - } + if (buf < 0) + ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX); + + munmap(addr1, size); + close(buf); + close(memfd); + + /* should work (migration of 4k size pages)*/ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, false); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX); + + close(buf); + close(memfd); + + /* should work (migration of 2MB size huge pages)*/ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX); - fprintf(stderr, "%s: ok\n", TEST_PREFIX); close(buf); close(memfd); close(devfd); + + ksft_print_msg("%s: ok\n", TEST_PREFIX); + ksft_print_cnts(); + return 0; } diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index ab67d58cfab7..ba012bc5aab9 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS = -Wall CFLAGS += -Wno-nonnull -CFLAGS += -D_GNU_SOURCE ALIGNS := 0x1000 0x200000 0x1000000 ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index f142a137526c..85acb4e3ef00 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -13,6 +13,8 @@ #include <sys/eventfd.h> #include "../../kselftest_harness.h" +#define EVENTFD_TEST_ITERATIONS 100000UL + struct error { int code; char msg[512]; @@ -40,7 +42,7 @@ static inline int sys_eventfd2(unsigned int count, int flags) return syscall(__NR_eventfd2, count, flags); } -TEST(eventfd01) +TEST(eventfd_check_flag_rdwr) { int fd, flags; @@ -54,7 +56,7 @@ TEST(eventfd01) close(fd); } -TEST(eventfd02) +TEST(eventfd_check_flag_cloexec) { int fd, flags; @@ -68,7 +70,7 @@ TEST(eventfd02) close(fd); } -TEST(eventfd03) +TEST(eventfd_check_flag_nonblock) { int fd, flags; @@ -83,7 +85,7 @@ TEST(eventfd03) close(fd); } -TEST(eventfd04) +TEST(eventfd_chek_flag_cloexec_and_nonblock) { int fd, flags; @@ -161,7 +163,7 @@ static int verify_fdinfo(int fd, struct error *err, const char *prefix, return 0; } -TEST(eventfd05) +TEST(eventfd_check_flag_semaphore) { struct error err = {0}; int fd, ret; @@ -183,4 +185,128 @@ TEST(eventfd05) close(fd); } +/* + * A write(2) fails with the error EINVAL if the size of the supplied buffer + * is less than 8 bytes, or if an attempt is made to write the value + * 0xffffffffffffffff. + */ +TEST(eventfd_check_write) +{ + uint64_t value = 1; + ssize_t size; + int fd; + + fd = sys_eventfd2(0, 0); + ASSERT_GE(fd, 0); + + size = write(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + + value = (uint64_t)-1; + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + close(fd); +} + +/* + * A read(2) fails with the error EINVAL if the size of the supplied buffer is + * less than 8 bytes. + */ +TEST(eventfd_check_read) +{ + uint64_t value; + ssize_t size; + int fd; + + fd = sys_eventfd2(1, 0); + ASSERT_GE(fd, 0); + + size = read(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + + close(fd); +} + + +/* + * If EFD_SEMAPHORE was not specified and the eventfd counter has a nonzero + * value, then a read(2) returns 8 bytes containing that value, and the + * counter's value is reset to zero. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_nonsemaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(uint64_t)); + EXPECT_EQ(value, EVENTFD_TEST_ITERATIONS); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + +/* + * If EFD_SEMAPHORE was specified and the eventfd counter has a nonzero value, + * then a read(2) returns 8 bytes containing the value 1, and the counter's + * value is decremented by 1. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_semaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_SEMAPHORE|EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 994fa3468f17..f79f9bac7918 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) -CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE= -pthread $(INCLUDES) $(KHDR_INCLUDES) +CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) LDLIBS := -lpthread -lrt LOCAL_HDRS := \ diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 05d66ef50c97..f45372cb00fe 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE +CFLAGS := $(CFLAGS) -Wall LDLIBS += -lm ARCH ?= $(shell uname -m 2>/dev/null || echo not) diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile index 32c5fdfd0eef..fd6477911f24 100644 --- a/tools/testing/selftests/iommu/Makefile +++ b/tools/testing/selftests/iommu/Makefile @@ -2,8 +2,6 @@ CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += $(KHDR_INCLUDES) -CFLAGS += -D_GNU_SOURCE - TEST_GEN_PROGS := TEST_GEN_PROGS += iommufd TEST_GEN_PROGS += iommufd_fail_nth diff --git a/tools/testing/selftests/devices/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e..cd89fb2bc10e 100644 --- a/tools/testing/selftests/devices/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ac280dcba996..48d32c5aa3eb 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -112,6 +112,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test @@ -145,6 +146,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test TEST_GEN_PROGS_x86_64 += system_counter_offset_test +TEST_GEN_PROGS_x86_64 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test @@ -231,7 +233,7 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index a7de39fa2a0a..d20981663831 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -219,6 +219,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_DONE(); } @@ -490,11 +491,25 @@ static void test_clidr(struct kvm_vcpu *vcpu) test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; } +static void test_ctr(struct kvm_vcpu *vcpu) +{ + u64 ctr; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr); + ctr &= ~CTR_EL0_DIC_MASK; + if (ctr & CTR_EL0_IminLine_MASK) + ctr--; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); + test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; +} + static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) { u64 val; test_clidr(vcpu); + test_ctr(vcpu); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); val++; @@ -524,7 +539,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); ksft_test_result_pass("%s\n", __func__); } diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index bed316fdecd5..0f268b55fa06 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -60,6 +60,14 @@ #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 #define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 void apic_disable(void); void xapic_enable(void); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index c0c7c1fe93f9..a0c1440017bb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -23,6 +23,7 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; +extern uint64_t guest_tsc_khz; /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -816,6 +817,23 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } +static inline void udelay(unsigned long usec) +{ + uint64_t start, now, cycles; + + GUEST_ASSERT(guest_tsc_khz); + cycles = guest_tsc_khz / 1000 * usec; + + /* + * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is + * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. + */ + start = rdtsc(); + do { + now = rdtsc(); + } while (now - start < cycles); +} + #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ad00e4761886..56b170b725b3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; +static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); @@ -434,7 +435,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - pr_info("Random seed: 0x%x\n", guest_random_seed); + if (guest_random_seed != last_guest_seed) { + pr_info("Random seed: 0x%x\n", guest_random_seed); + last_guest_seed = guest_random_seed; + } guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); @@ -2319,7 +2323,8 @@ void __attribute((constructor)) kvm_selftest_init(void) /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); - guest_random_seed = random(); + guest_random_seed = last_guest_seed = random(); + pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 594b061aef52..153739f2e201 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -25,6 +25,7 @@ vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; bool is_forced_emulation_enabled; +uint64_t guest_tsc_khz; static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -616,6 +617,11 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) void kvm_arch_vm_post_create(struct kvm_vm *vm) { + int r; + + TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), + "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); + vm_create_irqchip(vm); vm_init_descriptor_tables(vm); @@ -628,6 +634,11 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } + + r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); + guest_tsc_khz = r; + sync_global_to_guest(vm, guest_tsc_khz); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 05fcf902e067..49f162573126 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -53,12 +53,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c new file mode 100644 index 000000000000..0350a8896a2f --- /dev/null +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Intel, Inc + * + * Author: + * Isaku Yamahata <isaku.yamahata at gmail.com> + */ +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +/* Arbitrarily chosen values */ +#define TEST_SIZE (SZ_2M + PAGE_SIZE) +#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) +#define TEST_SLOT 10 + +static void guest_code(uint64_t base_gpa) +{ + volatile uint64_t val __used; + int i; + + for (i = 0; i < TEST_NPAGES; i++) { + uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE); + + val = *src; + } + + GUEST_DONE(); +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, + u64 left) +{ + struct kvm_pre_fault_memory range = { + .gpa = gpa, + .size = size, + .flags = 0, + }; + u64 prev; + int ret, save_errno; + + do { + prev = range.size; + ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); + save_errno = errno; + TEST_ASSERT((range.size < prev) ^ (ret < 0), + "%sexpecting range.size to change on %s", + ret < 0 ? "not " : "", + ret < 0 ? "failure" : "success"); + } while (ret >= 0 ? range.size : save_errno == EINTR); + + TEST_ASSERT(range.size == left, + "Completed with %lld bytes left, expected %" PRId64, + range.size, left); + + if (left == 0) + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + else + /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ + __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); +} + +static void __test_pre_fault_memory(unsigned long vm_type, bool private) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = vm_type, + }; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + uint64_t guest_test_phys_mem; + uint64_t guest_test_virt_mem; + uint64_t alignment, guest_page_size; + + vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); + + alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size; +#ifdef __s390x__ + alignment = max(0x100000UL, guest_page_size); +#else + alignment = SZ_2M; +#endif + guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); + guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, TEST_SLOT, TEST_NPAGES, + private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES); + + if (private) + vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); + pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); + pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); + pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + + vcpu_args_set(vcpu, 1, guest_test_virt_mem); + vcpu_run(vcpu); + + run = vcpu->run; + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + break; + } + + kvm_vm_free(vm); +} + +static void test_pre_fault_memory(unsigned long vm_type, bool private) +{ + if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { + pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); + return; + } + + __test_pre_fault_memory(vm_type, private); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); + + test_pre_fault_memory(0, false); +#ifdef __x86_64__ + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); +#endif + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 222198dd6d04..f92c2fb23fcd 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -49,6 +49,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: @@ -56,6 +57,11 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: @@ -68,6 +74,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: @@ -415,6 +422,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), @@ -422,6 +430,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), @@ -434,6 +447,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), KVM_ISA_EXT_ARR(ZKNH), @@ -939,6 +953,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); @@ -946,6 +961,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); @@ -958,6 +978,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); @@ -995,6 +1016,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_zacas, + &config_zawrs, &config_zba, &config_zbb, &config_zbc, @@ -1002,6 +1024,11 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zbkc, &config_zbkx, &config_zbs, + &config_zca, + &config_zcb, + &config_zcd, + &config_zcf, + &config_zcmop, &config_zfa, &config_zfh, &config_zfhmin, @@ -1014,6 +1041,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zihintntl, &config_zihintpause, &config_zihpm, + &config_zimop, &config_zknd, &config_zkne, &config_zknh, diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c new file mode 100644 index 000000000000..f8916bb34405 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Verify KVM correctly emulates the APIC bus frequency when the VMM configures + * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by + * programming TMICT (timer initial count) to the largest value possible (so + * that the timer will not expire during the test). Then, after an arbitrary + * amount of time has elapsed, verify TMCCT (timer current count) is within 1% + * of the expected value based on the time elapsed, the APIC bus frequency, and + * the programmed TDCR (timer divide configuration register). + */ + +#include "apic.h" +#include "test_util.h" + +/* + * Possible TDCR values with matching divide count. Used to modify APIC + * timer frequency. + */ +static const struct { + const uint32_t tdcr; + const uint32_t divide_count; +} tdcrs[] = { + {0x0, 2}, + {0x1, 4}, + {0x2, 8}, + {0x3, 16}, + {0x8, 32}, + {0x9, 64}, + {0xa, 128}, + {0xb, 1}, +}; + +static bool is_x2apic; + +static void apic_enable(void) +{ + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); +} + +static uint32_t apic_read_reg(unsigned int reg) +{ + return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); +} + +static void apic_write_reg(unsigned int reg, uint32_t val) +{ + if (is_x2apic) + x2apic_write_reg(reg, val); + else + xapic_write_reg(reg, val); +} + +static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + uint64_t tsc_hz = guest_tsc_khz * 1000; + const uint32_t tmict = ~0u; + uint64_t tsc0, tsc1, freq; + uint32_t tmcct; + int i; + + apic_enable(); + + /* + * Setup one-shot timer. The vector does not matter because the + * interrupt should not fire. + */ + apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); + + for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { + apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); + apic_write_reg(APIC_TMICT, tmict); + + tsc0 = rdtsc(); + udelay(delay_ms * 1000); + tmcct = apic_read_reg(APIC_TMCCT); + tsc1 = rdtsc(); + + /* + * Stop the timer _after_ reading the current, final count, as + * writing the initial counter also modifies the current count. + */ + apic_write_reg(APIC_TMICT, 0); + + freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); + /* Check if measured frequency is within 5% of configured frequency. */ + __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, + "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", + freq, apic_hz * 95 / 100, apic_hz * 105 / 100, + apic_hz, tdcrs[i].divide_count, tsc_hz); + } + + GUEST_DONE(); +} + +static void test_apic_bus_clock(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + } +} + +static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, + bool x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int ret; + + is_x2apic = x2apic; + + vm = vm_create(1); + + sync_global_to_guest(vm, is_x2apic); + + vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); + vcpu_args_set(vcpu, 2, apic_hz, delay_ms); + + ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + TEST_ASSERT(ret < 0 && errno == EINVAL, + "Setting of APIC bus frequency after vCPU is created should fail."); + + if (!is_x2apic) + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_apic_bus_clock(vcpu); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); + puts(""); + printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); + printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + /* + * Arbitrarilty default to 25MHz for the APIC bus frequency, which is + * different enough from the default 1GHz to be interesting. + */ + uint64_t apic_hz = 25 * 1000 * 1000; + uint64_t delay_ms = 100; + int opt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); + + while ((opt = getopt(argc, argv, "d:f:h")) != -1) { + switch (opt) { + case 'f': + apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; + break; + case 'd': + delay_ms = atoi_positive("Delay in milliseconds", optarg); + break; + case 'h': + default: + help(argv[0]); + exit(KSFT_SKIP); + } + } + + run_apic_bus_clock_test(apic_hz, delay_ms, false); + run_apic_bus_clock_test(apic_hz, delay_ms, true); +} diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c index 3cc4b86832fe..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c @@ -26,19 +26,37 @@ int main(int argc, char *argv[]) TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ + if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); + + /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); + } + /* Set KVM_CAP_MAX_VCPU_ID */ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); - /* Try to set KVM_CAP_MAX_VCPU_ID again */ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); - /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); + TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); + + /* Create vCPU with id within bounds */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); + TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); + + close(ret); kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 96446134c00b..698cb36989db 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -7,15 +7,28 @@ #include "pmu.h" #include "processor.h" -/* Number of LOOP instructions for the guest measurement payload. */ -#define NUM_BRANCHES 10 +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* + * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, + * 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 3 + /* * Number of "extra" instructions that will be counted, i.e. the number of - * instructions that are needed to set up the loop and then disabled the - * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR. + * instructions that are needed to set up the loop and then disable the + * counter. 2 MOV, 2 XOR, 1 WRMSR. */ -#define NUM_EXTRA_INSNS 7 -#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) +#define NUM_EXTRA_INSNS 5 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; @@ -100,7 +113,7 @@ static void guest_assert_event_count(uint8_t idx, GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCHES); + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -120,7 +133,7 @@ static void guest_assert_event_count(uint8_t idx, } sanity_checks: - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); GUEST_ASSERT_EQ(_rdpmc(pmc), count); wrmsr(pmc_msr, 0xdead); @@ -134,8 +147,8 @@ sanity_checks: * before the end of the sequence. * * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the - * start of the loop to force LLC references and misses, i.e. to allow testing - * that those events actually count. + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. * * If forced emulation is enabled (and specified), force emulation on a subset * of the measured code to verify that KVM correctly emulates instructions and @@ -145,10 +158,11 @@ sanity_checks: #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ do { \ __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ clflush "\n\t" \ "mfence\n\t" \ - "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \ - FEP "loop .\n\t" \ + FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ @@ -163,9 +177,9 @@ do { \ wrmsr(pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ @@ -500,7 +514,7 @@ static void guest_test_fixed_counters(void) wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 26b3e7efe5dd..c15513cd74d1 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -32,8 +32,8 @@ struct __kvm_pmu_event_filter { /* * This event list comprises Intel's known architectural events, plus AMD's - * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs). - * Note, AMD and Intel use the same encoding for instructions retired. + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. */ kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); @@ -353,38 +353,13 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t family, uint32_t model) -{ - return family == 0x17 && model <= 0x0f; -} - -static bool is_zen2(uint32_t family, uint32_t model) -{ - return family == 0x17 && model >= 0x30 && model <= 0x3f; -} - -static bool is_zen3(uint32_t family, uint32_t model) -{ - return family == 0x19 && model <= 0x0f; -} - /* - * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. The AMD - * test code herein has been verified to work on Zen1, Zen2, and Zen3. - * - * Feel free to add more AMD CPUs that are documented to support event - * select 0xc2 umask 0 as "retired branch instructions." + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. */ static bool use_amd_pmu(void) { - uint32_t family = kvm_cpu_family(); - uint32_t model = kvm_cpu_model(); - - return host_cpu_is_amd && - (is_zen1(family, model) || - is_zen2(family, model) || - is_zen3(family, model)); + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; } /* diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index d691d86e5bc3..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -33,6 +33,20 @@ static void guest_not_bsp_vcpu(void *arg) GUEST_DONE(); } +static void test_set_invalid_bsp(struct kvm_vm *vm) +{ + unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + int r; + + if (max_vcpu_id) { + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); + } + + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); +} + static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) { int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, @@ -80,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, vm = vm_create(nr_vcpus); + test_set_invalid_bsp(vm); + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7b299ed5ff45..d6edcfcb5be8 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -196,6 +196,9 @@ endef clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir) $(CLEAN) +# Build with _GNU_SOURCE by default +CFLAGS += -D_GNU_SOURCE= + # Enables to extend CFLAGS and LDFLAGS from command line, e.g. # make USERCFLAGS=-Werror USERLDFLAGS=-static CFLAGS += $(USERCFLAGS) diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index e3455a6b1158..65c9c058458d 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -4,7 +4,9 @@ . $(dirname $0)/functions.sh -MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH1=test_klp_livepatch +MOD_LIVEPATCH2=test_klp_syscall +MOD_LIVEPATCH3=test_klp_callbacks_demo MOD_REPLACE=test_klp_atomic_replace setup_config @@ -16,33 +18,33 @@ setup_config start_test "basic function patching" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 -if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH1: this has been live patched" ]] ; then echo -e "FAIL\n\n" die "livepatch kselftest(s) failed" fi -disable_lp $MOD_LIVEPATCH -unload_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH1 +unload_lp $MOD_LIVEPATCH1 -if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then +if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH1: this has been live patched" ]] ; then echo -e "FAIL\n\n" die "livepatch kselftest(s) failed" fi -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled -livepatch: '$MOD_LIVEPATCH': initializing unpatching transition -livepatch: '$MOD_LIVEPATCH': starting unpatching transition -livepatch: '$MOD_LIVEPATCH': completing unpatching transition -livepatch: '$MOD_LIVEPATCH': unpatching complete -% rmmod $MOD_LIVEPATCH" +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH1': starting unpatching transition +livepatch: '$MOD_LIVEPATCH1': completing unpatching transition +livepatch: '$MOD_LIVEPATCH1': unpatching complete +% rmmod $MOD_LIVEPATCH1" # - load a livepatch that modifies the output from /proc/cmdline and @@ -53,7 +55,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete start_test "multiple livepatches" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg @@ -69,26 +71,26 @@ unload_lp $MOD_REPLACE grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -disable_lp $MOD_LIVEPATCH -unload_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH1 +unload_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -$MOD_LIVEPATCH: this has been live patched +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +$MOD_LIVEPATCH1: this has been live patched % insmod test_modules/$MOD_REPLACE.ko replace=0 livepatch: enabling patch '$MOD_REPLACE' livepatch: '$MOD_REPLACE': initializing patching transition livepatch: '$MOD_REPLACE': starting patching transition livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete -$MOD_LIVEPATCH: this has been live patched +$MOD_LIVEPATCH1: this has been live patched $MOD_REPLACE: this has been live patched % echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition @@ -96,35 +98,57 @@ livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition livepatch: '$MOD_REPLACE': unpatching complete % rmmod $MOD_REPLACE -$MOD_LIVEPATCH: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled -livepatch: '$MOD_LIVEPATCH': initializing unpatching transition -livepatch: '$MOD_LIVEPATCH': starting unpatching transition -livepatch: '$MOD_LIVEPATCH': completing unpatching transition -livepatch: '$MOD_LIVEPATCH': unpatching complete -% rmmod $MOD_LIVEPATCH" +$MOD_LIVEPATCH1: this has been live patched +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH1': starting unpatching transition +livepatch: '$MOD_LIVEPATCH1': completing unpatching transition +livepatch: '$MOD_LIVEPATCH1': unpatching complete +% rmmod $MOD_LIVEPATCH1" # - load a livepatch that modifies the output from /proc/cmdline and # verify correct behavior -# - load an atomic replace livepatch and verify that only the second is active -# - remove the first livepatch and verify that the atomic replace livepatch -# is still active +# - load two additional livepatches and check the number of livepatch modules +# applied +# - load an atomic replace livepatch and check that the other three modules were +# disabled +# - remove all livepatches besides the atomic replace one and verify that the +# atomic replace livepatch is still active # - remove the atomic replace livepatch and verify that none are active start_test "atomic replace livepatch" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg +for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do + load_lp "$mod" +done + +mods=(/sys/kernel/livepatch/*) +nmods=${#mods[@]} +if [ "$nmods" -ne 3 ]; then + die "Expecting three modules listed, found $nmods" +fi + load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -unload_lp $MOD_LIVEPATCH +mods=(/sys/kernel/livepatch/*) +nmods=${#mods[@]} +if [ "$nmods" -ne 1 ]; then + die "Expecting only one moduled listed, found $nmods" +fi + +# These modules were disabled by the atomic replace +for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do + unload_lp "$mod" +done grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg @@ -135,13 +159,27 @@ unload_lp $MOD_REPLACE grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -$MOD_LIVEPATCH: this has been live patched +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +$MOD_LIVEPATCH1: this has been live patched +% insmod test_modules/$MOD_LIVEPATCH2.ko +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +livepatch: '$MOD_LIVEPATCH2': patching complete +% insmod test_modules/$MOD_LIVEPATCH3.ko +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +$MOD_LIVEPATCH3: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +$MOD_LIVEPATCH3: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH3': patching complete % insmod test_modules/$MOD_REPLACE.ko replace=1 livepatch: enabling patch '$MOD_REPLACE' livepatch: '$MOD_REPLACE': initializing patching transition @@ -149,7 +187,9 @@ livepatch: '$MOD_REPLACE': starting patching transition livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete $MOD_REPLACE: this has been live patched -% rmmod $MOD_LIVEPATCH +% rmmod $MOD_LIVEPATCH3 +% rmmod $MOD_LIVEPATCH2 +% rmmod $MOD_LIVEPATCH1 $MOD_REPLACE: this has been live patched % echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh index b76a881d4013..289eb7d4c4b3 100755 --- a/tools/testing/selftests/livepatch/test-syscall.sh +++ b/tools/testing/selftests/livepatch/test-syscall.sh @@ -15,7 +15,10 @@ setup_config start_test "patch getpid syscall while being heavily hammered" -for i in $(seq 1 $(getconf _NPROCESSORS_ONLN)); do +NPROC=$(getconf _NPROCESSORS_ONLN) +MAXPROC=128 + +for i in $(seq 1 $(($NPROC < $MAXPROC ? $NPROC : $MAXPROC))); do ./test_klp-call_getpid & pids[$i]="$!" done diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 6c646afa7395..05a14f5a7bfb 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -18,6 +18,7 @@ check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x" check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--" check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1" check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------" +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--" check_sysfs_value "$MOD_LIVEPATCH" "transition" "0" check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--" @@ -83,4 +84,51 @@ test_klp_callbacks_demo: post_unpatch_callback: vmlinux livepatch: 'test_klp_callbacks_demo': unpatching complete % rmmod test_klp_callbacks_demo" +start_test "sysfs test replace enabled" + +MOD_LIVEPATCH=test_klp_atomic_replace +load_lp $MOD_LIVEPATCH replace=1 + +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "replace" "1" + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=1 +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + +start_test "sysfs test replace disabled" + +load_lp $MOD_LIVEPATCH replace=0 + +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "replace" "0" + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=0 +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 0b9ab987601c..da030b43e43b 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -6,6 +6,7 @@ hugepage-shm hugepage-vmemmap hugetlb-madvise hugetlb-read-hwpoison +hugetlb-soft-offline khugepaged map_hugetlb map_populate @@ -49,3 +50,4 @@ hugetlb_fault_after_madv hugetlb_madv_vs_map mseal_test seal_elf +droppable diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 3b49bc3d0a3b..901e0d07765b 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -2,6 +2,7 @@ # Makefile for mm selftests LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h include local_config.mk @@ -42,6 +43,7 @@ TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugetlb-soft-offline TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm @@ -73,6 +75,8 @@ TEST_GEN_FILES += ksm_functional_tests TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map +TEST_GEN_FILES += hugetlb_dio +TEST_GEN_FILES += droppable ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c new file mode 100644 index 000000000000..f3d9ecf96890 --- /dev/null +++ b/tools/testing/selftests/mm/droppable.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <linux/mman.h> + +#include "../kselftest.h" + +int main(int argc, char *argv[]) +{ + size_t alloc_size = 134217728; + size_t page_size = getpagesize(); + void *alloc; + pid_t child; + + ksft_print_header(); + ksft_set_plan(1); + + alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0); + assert(alloc != MAP_FAILED); + memset(alloc, 'A', alloc_size); + for (size_t i = 0; i < alloc_size; i += page_size) + assert(*(uint8_t *)(alloc + i)); + + child = fork(); + assert(child >= 0); + if (!child) { + for (;;) + *(char *)malloc(page_size) = 'B'; + } + + for (bool done = false; !done;) { + for (size_t i = 0; i < alloc_size; i += page_size) { + if (!*(uint8_t *)(alloc + i)) { + done = true; + break; + } + } + } + kill(child, SIGTERM); + + ksft_test_result_pass("MAP_DROPPABLE: PASS\n"); + exit(KSFT_PASS); +} diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index c463d1c09c9b..ada9156cc497 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <sys/mman.h> #include <errno.h> #include <fcntl.h> /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c new file mode 100644 index 000000000000..f086f0e04756 --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test soft offline behavior for HugeTLB pages: + * - if enable_soft_offline = 0, hugepages should stay intact and soft + * offlining failed with EOPNOTSUPP. + * - if enable_soft_offline = 1, a hugepage should be dissolved and + * nr_hugepages/free_hugepages should be reduced by 1. + * + * Before running, make sure more than 2 hugepages of default_hugepagesz + * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB: + * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <linux/magic.h> +#include <linux/memfd.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <sys/types.h> + +#include "../kselftest.h" + +#ifndef MADV_SOFT_OFFLINE +#define MADV_SOFT_OFFLINE 101 +#endif + +#define EPREFIX " !!! " + +static int do_soft_offline(int fd, size_t len, int expect_errno) +{ + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + int ret = 0; + + if (ftruncate(fd, len) < 0) { + ksft_perror(EPREFIX "ftruncate to len failed"); + return -1; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + ksft_perror(EPREFIX "mmap failed"); + ret = -1; + goto untruncate; + } + + memset(filemap, 0xab, len); + ksft_print_msg("Allocated %#lx bytes of hugetlb pages\n", len); + + hwp_addr = filemap + len / 2; + ret = madvise(hwp_addr, pagesize, MADV_SOFT_OFFLINE); + ksft_print_msg("MADV_SOFT_OFFLINE %p ret=%d, errno=%d\n", + hwp_addr, ret, errno); + if (ret != 0) + ksft_perror(EPREFIX "madvise failed"); + + if (errno == expect_errno) + ret = 0; + else { + ksft_print_msg("MADV_SOFT_OFFLINE should ret %d\n", + expect_errno); + ret = -1; + } + + munmap(filemap, len); +untruncate: + if (ftruncate(fd, 0) < 0) + ksft_perror(EPREFIX "ftruncate back to 0 failed"); + + return ret; +} + +static int set_enable_soft_offline(int value) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + if (value != 0 && value != 1) + return -EINVAL; + + sprintf(cmd, "echo %d > /proc/sys/vm/enable_soft_offline", value); + cmdfile = popen(cmd, "r"); + + if (cmdfile) + ksft_print_msg("enable_soft_offline => %d\n", value); + else { + ksft_perror(EPREFIX "failed to set enable_soft_offline"); + return errno; + } + + pclose(cmdfile); + return 0; +} + +static int read_nr_hugepages(unsigned long hugepage_size, + unsigned long *nr_hugepages) +{ + char buffer[256] = {0}; + char cmd[256] = {0}; + + sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages", + hugepage_size); + FILE *cmdfile = popen(cmd, "r"); + + if (cmdfile == NULL) { + ksft_perror(EPREFIX "failed to popen nr_hugepages"); + return -1; + } + + if (!fgets(buffer, sizeof(buffer), cmdfile)) { + ksft_perror(EPREFIX "failed to read nr_hugepages"); + pclose(cmdfile); + return -1; + } + + *nr_hugepages = atoll(buffer); + pclose(cmdfile); + return 0; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + ksft_perror(EPREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + ksft_perror(EPREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + ksft_print_msg(EPREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +static void test_soft_offline_common(int enable_soft_offline) +{ + int fd; + int expect_errno = enable_soft_offline ? 0 : EOPNOTSUPP; + struct statfs file_stat; + unsigned long hugepagesize_kb = 0; + unsigned long nr_hugepages_before = 0; + unsigned long nr_hugepages_after = 0; + int ret; + + ksft_print_msg("Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + ksft_exit_fail_msg("Failed to create hugetlbfs file\n"); + + hugepagesize_kb = file_stat.f_bsize / 1024; + ksft_print_msg("Hugepagesize is %ldkB\n", hugepagesize_kb); + + if (set_enable_soft_offline(enable_soft_offline) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to set enable_soft_offline\n"); + } + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_before); + + ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno); + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_after); + + // No need for the hugetlbfs file from now on. + close(fd); + + if (enable_soft_offline) { + if (nr_hugepages_before != nr_hugepages_after + 1) { + ksft_test_result_fail("MADV_SOFT_OFFLINE should reduced 1 hugepage\n"); + return; + } + } else { + if (nr_hugepages_before != nr_hugepages_after) { + ksft_test_result_fail("MADV_SOFT_OFFLINE reduced %lu hugepages\n", + nr_hugepages_before - nr_hugepages_after); + return; + } + } + + ksft_test_result(ret == 0, + "Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_soft_offline_common(1); + test_soft_offline_common(0); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c new file mode 100644 index 000000000000..f9ac20c657ec --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program tests for hugepage leaks after DIO writes to a file using a + * hugepage as the user buffer. During DIO, the user buffer is pinned and + * should be properly unpinned upon completion. This patch verifies that the + * kernel correctly unpins the buffer at DIO completion for both aligned and + * unaligned user buffer offsets (w.r.t page boundary), ensuring the hugepage + * is freed upon unmapping. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include "vm_util.h" +#include "../kselftest.h" + +void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) +{ + int fd; + char *buffer = NULL; + char *orig_buffer = NULL; + size_t h_pagesize = 0; + size_t writesize; + int free_hpage_b = 0; + int free_hpage_a = 0; + const int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + const int mmap_prot = PROT_READ | PROT_WRITE; + + writesize = end_off - start_off; + + /* Get the default huge page size */ + h_pagesize = default_huge_page_size(); + if (!h_pagesize) + ksft_exit_fail_msg("Unable to determine huge page size\n"); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_fail_perror("Error opening file\n"); + + /* Get the free huge pages before allocation */ + free_hpage_b = get_free_hugepages(); + if (free_hpage_b == 0) { + close(fd); + ksft_exit_skip("No free hugepage, exiting!\n"); + } + + /* Allocate a hugetlb page */ + orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); + if (orig_buffer == MAP_FAILED) { + close(fd); + ksft_exit_fail_perror("Error mapping memory\n"); + } + buffer = orig_buffer; + buffer += start_off; + + memset(buffer, 'A', writesize); + + /* Write the buffer to the file */ + if (write(fd, buffer, writesize) != (writesize)) { + munmap(orig_buffer, h_pagesize); + close(fd); + ksft_exit_fail_perror("Error writing to file\n"); + } + + /* unmap the huge page */ + munmap(orig_buffer, h_pagesize); + close(fd); + + /* Get the free huge pages after unmap*/ + free_hpage_a = get_free_hugepages(); + + /* + * If the no. of free hugepages before allocation and after unmap does + * not match - that means there could still be a page which is pinned. + */ + if (free_hpage_a != free_hpage_b) { + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + ksft_test_result_fail(": Huge pages not freed!\n"); + } else { + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + ksft_test_result_pass(": Huge pages freed successfully !\n"); + } +} + +int main(void) +{ + size_t pagesize = 0; + + ksft_print_header(); + ksft_set_plan(4); + + /* Get base page size */ + pagesize = psize(); + + /* start and end is aligned to pagesize */ + run_dio_using_hugetlb(0, (pagesize * 3)); + + /* start is aligned but end is not aligned */ + run_dio_using_hugetlb(0, (pagesize * 3) - (pagesize / 2)); + + /* start is unaligned and end is aligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3)); + + /* both start and end are unaligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3) + (pagesize / 2)); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index b61803e36d1c..66b4e111b5a2 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include <string.h> #include <stdbool.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <errno.h> #include <fcntl.h> #include <sys/mman.h> @@ -369,7 +369,6 @@ unmap: munmap(map, size); } -#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -430,7 +429,6 @@ close_uffd: unmap: munmap(map, size); } -#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -686,9 +684,7 @@ int main(int argc, char **argv) exit(test_child_ksm()); } -#ifdef __NR_userfaultfd tests++; -#endif ksft_print_header(); ksft_set_plan(tests); @@ -700,9 +696,7 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); -#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); -#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 9a0597310a76..74c911aa3aea 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <string.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <errno.h> #include <stdio.h> #include <fcntl.h> @@ -28,8 +28,6 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) -#ifdef __NR_memfd_secret - #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -334,13 +332,3 @@ int main(int argc, char *argv[]) ksft_finished(); } - -#else /* __NR_memfd_secret */ - -int main(int argc, char *argv[]) -{ - printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); - return KSFT_SKIP; -} - -#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index b8a7efe9204e..1db134063c38 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include <fcntl.h> #include <signal.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <errno.h> #include <stdlib.h> @@ -265,7 +265,6 @@ munmap: munmap(mmap_mem, mmap_size); } -#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,7 +321,6 @@ munmap: munmap(dst, pagesize); free(src); } -#endif /* __NR_userfaultfd */ int main(void) { @@ -335,9 +333,7 @@ int main(void) thpsize / 1024); tests += 3; } -#ifdef __NR_userfaultfd tests += 1; -#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -367,9 +363,7 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ -#ifdef __NR_userfaultfd test_uffdio_copy(); -#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..1e5731bab499 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,6 +3,7 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> +#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/mseal_helpers.h b/tools/testing/selftests/mm/mseal_helpers.h new file mode 100644 index 000000000000..0cfce31c76d2 --- /dev/null +++ b/tools/testing/selftests/mm/mseal_helpers.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define FAIL_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_fail("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define SKIP_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_skip("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define REPORT_TEST_PASS() ksft_test_result_pass("%s\n", __func__) + +#ifndef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 +#endif + +#ifndef PKEY_BITS_PER_PKEY +#define PKEY_BITS_PER_PKEY 2 +#endif + +#ifndef PKEY_MASK +#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif + +#ifndef u64 +#define u64 unsigned long long +#endif diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index 41998cf1dcf5..a818f010de47 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -3,7 +3,7 @@ #include <linux/mman.h> #include <sys/mman.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <sys/time.h> #include <sys/resource.h> @@ -17,54 +17,7 @@ #include <sys/ioctl.h> #include <sys/vfs.h> #include <sys/stat.h> - -/* - * need those definition for manually build using gcc. - * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 mseal_test.c -o mseal_test - */ -#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - -#ifndef PKEY_BITS_PER_PKEY -#define PKEY_BITS_PER_PKEY 2 -#endif - -#ifndef PKEY_MASK -#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) -#endif - -#define FAIL_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - -#define SKIP_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - - -#define TEST_END_CHECK() {\ - ksft_test_result_pass("%s\n", __func__);\ - return;\ -test_end:\ - return;\ -} - -#ifndef u64 -#define u64 unsigned long long -#endif +#include "mseal_helpers.h" static unsigned long get_vma_size(void *addr, int *prot) { @@ -287,7 +240,7 @@ static void test_seal_addseal(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_start(void) @@ -315,7 +268,7 @@ static void test_seal_unmapped_start(void) ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_middle(void) @@ -347,7 +300,7 @@ static void test_seal_unmapped_middle(void) ret = sys_mseal(ptr + 3 * page_size, page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_end(void) @@ -376,7 +329,7 @@ static void test_seal_unmapped_end(void) ret = sys_mseal(ptr, 2 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_multiple_vmas(void) @@ -407,7 +360,7 @@ static void test_seal_multiple_vmas(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_split_start(void) @@ -432,7 +385,7 @@ static void test_seal_split_start(void) ret = sys_mseal(ptr + page_size, 3 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_split_end(void) @@ -457,7 +410,7 @@ static void test_seal_split_end(void) ret = sys_mseal(ptr, 3 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_invalid_input(void) @@ -492,7 +445,7 @@ static void test_seal_invalid_input(void) ret = sys_mseal(ptr - page_size, 5 * page_size); FAIL_TEST_IF_FALSE(ret < 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_zero_length(void) @@ -516,7 +469,7 @@ static void test_seal_zero_length(void) ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_zero_address(void) @@ -542,7 +495,7 @@ static void test_seal_zero_address(void) ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_twice(void) @@ -562,7 +515,7 @@ static void test_seal_twice(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect(bool seal) @@ -586,7 +539,7 @@ static void test_seal_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_start_mprotect(bool seal) @@ -616,7 +569,7 @@ static void test_seal_start_mprotect(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_end_mprotect(bool seal) @@ -646,7 +599,7 @@ static void test_seal_end_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_unalign_len(bool seal) @@ -675,7 +628,7 @@ static void test_seal_mprotect_unalign_len(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_unalign_len_variant_2(bool seal) @@ -703,7 +656,7 @@ static void test_seal_mprotect_unalign_len_variant_2(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma(bool seal) @@ -738,7 +691,7 @@ static void test_seal_mprotect_two_vma(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma_with_split(bool seal) @@ -785,7 +738,7 @@ static void test_seal_mprotect_two_vma_with_split(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_partial_mprotect(bool seal) @@ -811,7 +764,7 @@ static void test_seal_mprotect_partial_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma_with_gap(bool seal) @@ -854,7 +807,7 @@ static void test_seal_mprotect_two_vma_with_gap(bool seal) ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ); FAIL_TEST_IF_FALSE(ret == 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_split(bool seal) @@ -891,7 +844,7 @@ static void test_seal_mprotect_split(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_merge(bool seal) @@ -925,7 +878,7 @@ static void test_seal_mprotect_merge(bool seal) ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); FAIL_TEST_IF_FALSE(ret == 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_munmap(bool seal) @@ -950,7 +903,7 @@ static void test_seal_munmap(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } /* @@ -990,7 +943,7 @@ static void test_seal_munmap_two_vma(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } /* @@ -1028,7 +981,7 @@ static void test_seal_munmap_vma_with_gap(bool seal) ret = sys_munmap(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_start_freed(bool seal) @@ -1068,7 +1021,7 @@ static void test_munmap_start_freed(bool seal) FAIL_TEST_IF_FALSE(size == 0); } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_end_freed(bool seal) @@ -1098,7 +1051,7 @@ static void test_munmap_end_freed(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_middle_freed(bool seal) @@ -1142,7 +1095,7 @@ static void test_munmap_middle_freed(bool seal) FAIL_TEST_IF_FALSE(size == 0); } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_shrink(bool seal) @@ -1171,7 +1124,7 @@ static void test_seal_mremap_shrink(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_expand(bool seal) @@ -1203,7 +1156,7 @@ static void test_seal_mremap_expand(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move(bool seal) @@ -1236,7 +1189,7 @@ static void test_seal_mremap_move(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_overwrite_prot(bool seal) @@ -1264,7 +1217,7 @@ static void test_seal_mmap_overwrite_prot(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_expand(bool seal) @@ -1295,7 +1248,7 @@ static void test_seal_mmap_expand(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_shrink(bool seal) @@ -1323,7 +1276,7 @@ static void test_seal_mmap_shrink(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_shrink_fixed(bool seal) @@ -1354,7 +1307,7 @@ static void test_seal_mremap_shrink_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_expand_fixed(bool seal) @@ -1385,7 +1338,7 @@ static void test_seal_mremap_expand_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_fixed(bool seal) @@ -1415,7 +1368,7 @@ static void test_seal_mremap_move_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_fixed_zero(bool seal) @@ -1447,7 +1400,7 @@ static void test_seal_mremap_move_fixed_zero(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_dontunmap(bool seal) @@ -1476,7 +1429,7 @@ static void test_seal_mremap_move_dontunmap(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) @@ -1510,7 +1463,7 @@ static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } @@ -1603,7 +1556,7 @@ static void test_seal_merge_and_split(void) FAIL_TEST_IF_FALSE(size == 22 * page_size); FAIL_TEST_IF_FALSE(prot == 0x4); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_rw(bool seal) @@ -1632,7 +1585,7 @@ static void test_seal_discard_ro_anon_on_rw(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_pkey(bool seal) @@ -1679,7 +1632,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_filebacked(bool seal) @@ -1716,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal) FAIL_TEST_IF_FALSE(!ret); close(fd); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_shared(bool seal) @@ -1745,7 +1698,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon(bool seal) @@ -1775,7 +1728,7 @@ static void test_seal_discard_ro_anon(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 2d785aca72a5..fc90af2a97b8 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -15,7 +15,7 @@ #include <sys/ioctl.h> #include <sys/stat.h> #include <math.h> -#include <asm/unistd.h> +#include <asm-generic/unistd.h> #include <pthread.h> #include <sys/resource.h> #include <assert.h> @@ -1567,8 +1567,10 @@ int main(int argc, char *argv[]) /* 7. File Hugetlb testing */ mem_size = 2*1024*1024; fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL); + if (fd < 0) + ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (mem) { + if (mem != MAP_FAILED) { wp_init(mem, mem_size); wp_addr_range(mem, mem_size); diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 48dc151f8fca..eaa6d1fc5328 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <sys/ptrace.h> #include <setjmp.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3157204b9047..03ac4f2e1cce 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -265,6 +265,7 @@ CATEGORY="hugetlb" run_test ./map_hugetlb CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +CATEGORY="hugetlb" run_test ./hugetlb_dio nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) # For this test, we need one and just one huge page @@ -331,6 +332,12 @@ CATEGORY="hugetlb" run_test ./thuge-gen CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2 CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2 if $RUN_DESTRUCTIVE; then +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) +enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline) +echo 8 > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb-soft-offline +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison fi diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c index f2babec79bb6..7aa1366063e4 100644 --- a/tools/testing/selftests/mm/seal_elf.c +++ b/tools/testing/selftests/mm/seal_elf.c @@ -2,7 +2,7 @@ #define _GNU_SOURCE #include <sys/mman.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <sys/time.h> #include <sys/resource.h> @@ -16,38 +16,7 @@ #include <sys/ioctl.h> #include <sys/vfs.h> #include <sys/stat.h> - -/* - * need those definition for manually build using gcc. - * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 seal_elf.c -o seal_elf - */ -#define FAIL_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - -#define SKIP_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - - -#define TEST_END_CHECK() {\ - ksft_test_result_pass("%s\n", __func__);\ - return;\ -test_end:\ - return;\ -} - -#ifndef u64 -#define u64 unsigned long long -#endif +#include "mseal_helpers.h" /* * define sys_xyx to call syscall directly. @@ -158,7 +127,7 @@ static void test_seal_elf(void) FAIL_TEST_IF_FALSE(ret < 0); ksft_print_msg("somestr is sealed, mprotect is rejected\n"); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index d3c7f5fb3e7b..e5e8dafc9d94 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int __attribute__((unused)) dummy = 0; + int dummy = 0; srand(time(NULL)); @@ -341,6 +341,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, for (size_t i = 0; i < fd_size; i++) dummy += *(*addr + i); + asm volatile("" : "+r" (dummy)); if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index ea7fd8fe2876..e4370b79b62f 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -13,8 +13,9 @@ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m (warning this will remove all if someone else uses them) */ -#define _GNU_SOURCE 1 +#define _GNU_SOURCE #include <sys/mman.h> +#include <linux/mman.h> #include <stdlib.h> #include <stdio.h> #include <sys/ipc.h> @@ -28,19 +29,23 @@ #include "vm_util.h" #include "../kselftest.h" -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f #if !defined(MAP_HUGETLB) #define MAP_HUGETLB 0x40000 #endif #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#ifndef SHM_HUGE_SHIFT #define SHM_HUGE_SHIFT 26 +#endif +#ifndef SHM_HUGE_MASK #define SHM_HUGE_MASK 0x3f +#endif +#ifndef SHM_HUGE_2MB #define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#endif +#ifndef SHM_HUGE_1GB #define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) +#endif #define NUM_PAGESIZES 5 #define NUM_PAGES 4 diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..717539eddf98 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,11 +673,7 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { -#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); -#else - return -1; -#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index f78bab0f3d45..a4b83280998a 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,10 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ - +#include <asm-generic/unistd.h> #include "uffd-common.h" -#ifdef __NR_userfaultfd +uint64_t features; #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -247,10 +247,14 @@ static int userfaultfd_stress(void) unsigned long nr; struct uffd_args args[nr_cpus]; uint64_t mem_size = nr_pages * page_size; + int flags = 0; memset(args, 0, sizeof(struct uffd_args) * nr_cpus); - if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) + if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + flags = UFFD_FEATURE_WP_UNPOPULATED; + + if (uffd_test_ctx_init(flags, NULL)) err("context init failed"); if (posix_memalign(&area, page_size, page_size)) @@ -385,8 +389,6 @@ static void set_test_type(const char *type) static void parse_test_type_arg(const char *raw_type) { - uint64_t features = UFFD_API_FEATURES; - set_test_type(raw_type); if (!test_type) @@ -409,12 +411,15 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (userfaultfd_open(&features)) - err("Userfaultfd open failed"); + if (uffd_get_features(&features)) + err("failed to get available features"); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + test_uffdio_wp = false; + close(uffd); uffd = -1; } @@ -466,15 +471,3 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } - -#else /* __NR_userfaultfd */ - -#warning "missing __NR_userfaultfd definition" - -int main(void) -{ - printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return KSFT_SKIP; -} - -#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 21ec23206ab4..b3d21eed203d 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,12 +5,11 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ +#include <asm-generic/unistd.h> #include "uffd-common.h" #include "../../../../mm/gup_test.h" -#ifdef __NR_userfaultfd - /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1554,14 +1553,3 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } -#else /* __NR_userfaultfd */ - -#warning "missing __NR_userfaultfd definition" - -int main(void) -{ - printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); - return KSFT_SKIP; -} - -#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index cfbc501290d3..fa7eabfaf841 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -9,26 +9,9 @@ #include <sys/mman.h> #include <string.h> +#include "vm_util.h" #include "../kselftest.h" -#ifdef __powerpc64__ -#define PAGE_SIZE (64 << 10) -/* - * This will work with 16M and 2M hugepage size - */ -#define HUGETLB_SIZE (16 << 20) -#elif __aarch64__ -/* - * The default hugepage size for 64k base pagesize - * is 512MB. - */ -#define PAGE_SIZE (64 << 10) -#define HUGETLB_SIZE (512 << 20) -#else -#define PAGE_SIZE (4 << 10) -#define HUGETLB_SIZE (2 << 20) -#endif - /* * The hint addr value is used to allocate addresses * beyond the high address switch boundary. @@ -37,18 +20,8 @@ #define ADDR_MARK_128TB (1UL << 47) #define ADDR_MARK_256TB (1UL << 48) -#define HIGH_ADDR_128TB ((void *) (1UL << 48)) -#define HIGH_ADDR_256TB ((void *) (1UL << 49)) - -#define LOW_ADDR ((void *) (1UL << 30)) - -#ifdef __aarch64__ -#define ADDR_SWITCH_HINT ADDR_MARK_256TB -#define HIGH_ADDR HIGH_ADDR_256TB -#else -#define ADDR_SWITCH_HINT ADDR_MARK_128TB -#define HIGH_ADDR HIGH_ADDR_128TB -#endif +#define HIGH_ADDR_128TB (1UL << 48) +#define HIGH_ADDR_256TB (1UL << 49) struct testcase { void *addr; @@ -59,195 +32,230 @@ struct testcase { unsigned int keep_mapped:1; }; -static struct testcase testcases[] = { - { - /* - * If stack is moved, we could possibly allocate - * this at the requested address. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - /* - * Unless MAP_FIXED is specified, allocation based on hint - * addr is never at requested address or above it, which is - * beyond high address switch boundary in this case. Instead, - * a suitable allocation is found in lower address space. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", - .low_addr_required = 1, - }, - { - /* - * Exact mapping at high address switch boundary, should - * be obtained even without MAP_FIXED as area is free. - */ - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, -}; +static struct testcase *testcases; +static struct testcase *hugetlb_testcases; +static int sz_testcases, sz_hugetlb_testcases; +static unsigned long switch_hint; -static struct testcase hugetlb_testcases[] = { - { - .addr = NULL, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", - }, -}; +/* Initialize testcases inside a function to compute parameters at runtime */ +void testcases_init(void) +{ + unsigned long pagesize = getpagesize(); + unsigned long hugepagesize = default_huge_page_size(); + unsigned long low_addr = (1UL << 30); + unsigned long addr_switch_hint = ADDR_MARK_128TB; + unsigned long high_addr = HIGH_ADDR_128TB; + +#ifdef __aarch64__ + + /* Post LPA2, the lower userspace VA on a 16K pagesize is 47 bits. */ + if (pagesize != (16UL << 10)) { + addr_switch_hint = ADDR_MARK_256TB; + high_addr = HIGH_ADDR_256TB; + } +#endif + + struct testcase t[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + /* + * Unless MAP_FIXED is specified, allocation based on hint + * addr is never at requested address or above it, which is + * beyond high address switch boundary in this case. Instead, + * a suitable allocation is found in lower address space. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, (2 * pagesize))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at high address switch boundary, should + * be obtained even without MAP_FIXED as area is free. + */ + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize / 2), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize/2 , 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + }; + + struct testcase ht[] = { + { + .addr = NULL, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint , 2*hugepagesize, MAP_FIXED | MAP_HUGETLB)", + }, + }; + + testcases = malloc(sizeof(t)); + hugetlb_testcases = malloc(sizeof(ht)); + + /* Copy into global arrays */ + memcpy(testcases, t, sizeof(t)); + memcpy(hugetlb_testcases, ht, sizeof(ht)); + + sz_testcases = ARRAY_SIZE(t); + sz_hugetlb_testcases = ARRAY_SIZE(ht); + switch_hint = addr_switch_hint; +} static int run_test(struct testcase *test, int count) { @@ -267,7 +275,7 @@ static int run_test(struct testcase *test, int count) continue; } - if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + if (t->low_addr_required && p >= (void *)(switch_hint)) { printf("FAILED\n"); ret = KSFT_FAIL; } else { @@ -292,7 +300,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return getpagesize() == PAGE_SIZE; + return 1; #else return 0; #endif @@ -305,8 +313,10 @@ int main(int argc, char **argv) if (!supported_arch()) return KSFT_SKIP; - ret = run_test(testcases, ARRAY_SIZE(testcases)); + testcases_init(); + + ret = run_test(testcases, sz_testcases); if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) - ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + ret = run_test(hugetlb_testcases, sz_hugetlb_testcases); return ret; } diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index a0a75f302904..2c725773cd79 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -57,8 +57,4 @@ check_test_requirements() } check_test_requirements -./va_high_addr_switch - -# In order to run hugetlb testcases, "--run-hugetlb" must be appended -# to the binary. ./va_high_addr_switch --run-hugetlb diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 5c16159d0bcd..fb898850867c 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -323,7 +323,8 @@ void *fake_cont_thread(void *arg) void *cont_thread(void *arg) { char buff[MSG_SIZE]; - int i, priority; + int i; + unsigned int priority; for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i] == pthread_self()) @@ -425,7 +426,8 @@ struct test test2[] = { void *perf_test_thread(void *arg) { char buff[MSG_SIZE]; - int prio_out, prio_in; + int prio_out; + unsigned int prio_in; int i; clockid_t clock; pthread_t *t; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bc3925200637..8eaffd7a641c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 522d991e310e..bd88b90b902b 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -26,7 +26,7 @@ LIB := $(LIBDIR)/libaotst.a LDLIBS += $(LIB) -pthread LIBDEPS := lib/aolib.h Makefile -CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += -Wall -O2 -g -fno-strict-aliasing CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 47746b0c6acd..7c2a4349170a 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,11 +16,56 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> +#include <linux/ioctl.h> #include "pidfd.h" #include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_CGROUP_NAMESPACE +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#endif + +#ifndef PIDFD_GET_IPC_NAMESPACE +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#endif + +#ifndef PIDFD_GET_NET_NAMESPACE +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#endif + +#ifndef PIDFD_GET_PID_NAMESPACE +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#endif + +#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#endif + +#ifndef PIDFD_GET_TIME_NAMESPACE +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#endif + +#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#endif + +#ifndef PIDFD_GET_USER_NAMESPACE +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#endif + +#ifndef PIDFD_GET_UTS_NAMESPACE +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#endif + enum { PIDFD_NS_USER, PIDFD_NS_MNT, @@ -31,22 +76,25 @@ enum { PIDFD_NS_CGROUP, PIDFD_NS_PIDCLD, PIDFD_NS_TIME, + PIDFD_NS_TIMECLD, PIDFD_NS_MAX }; const struct ns_info { const char *name; int flag; + unsigned int pidfd_ioctl; } ns_info[] = { - [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, - [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, - [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, - [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, - [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, - [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, - [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, - [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, - [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, }, + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, }, + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, }, + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, }, + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, }, + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, }, + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, }, + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, }, + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, }, + [PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, }, }; FIXTURE(current_nsset) @@ -54,6 +102,7 @@ FIXTURE(current_nsset) pid_t pid; int pidfd; int nsfds[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds[PIDFD_NS_MAX]; pid_t child_pid_exited; int child_pidfd_exited; @@ -61,10 +110,12 @@ FIXTURE(current_nsset) pid_t child_pid1; int child_pidfd1; int child_nsfds1[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds1[PIDFD_NS_MAX]; pid_t child_pid2; int child_pidfd2; int child_nsfds2[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; static int sys_waitid(int which, pid_t pid, int options) @@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset) char c; for (i = 0; i < PIDFD_NS_MAX; i++) { - self->nsfds[i] = -EBADF; - self->child_nsfds1[i] = -EBADF; - self->child_nsfds2[i] = -EBADF; + self->nsfds[i] = -EBADF; + self->child_nsfds1[i] = -EBADF; + self->child_nsfds2[i] = -EBADF; + self->child_pidfd_derived_nsfds[i] = -EBADF; + self->child_pidfd_derived_nsfds1[i] = -EBADF; + self->child_pidfd_derived_nsfds2[i] = -EBADF; } proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); @@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset) } self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + EXPECT_GT(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + for (i = 0; i < PIDFD_NS_MAX; i++) { const struct ns_info *info = &ns_info[i]; self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); @@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset) info->name, self->pid); } } - } - self->pidfd = sys_pidfd_open(self->pid, 0); - EXPECT_GT(self->pidfd, 0) { - TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->pid); + } + } } /* Create task that exits right away. */ - self->child_pid_exited = create_child(&self->child_pidfd_exited, - CLONE_NEWUSER | CLONE_NEWNET); + self->child_pid_exited = create_child(&self->child_pidfd_exited, 0); EXPECT_GE(self->child_pid_exited, 0); - if (self->child_pid_exited == 0) + if (self->child_pid_exited == 0) { + if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0) + _exit(EXIT_FAILURE); + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); + } ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); @@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset) EXPECT_EQ(ret, 0); /* Create tasks that will be stopped. */ - self->child_pid1 = create_child(&self->child_pidfd1, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + else + self->child_pid1 = create_child(&self->child_pidfd1, 0); EXPECT_GE(self->child_pid1, 0); if (self->child_pid1 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset) ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); EXPECT_EQ(ret, 0); - self->child_pid2 = create_child(&self->child_pidfd2, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER); + else + self->child_pid2 = create_child(&self->child_pidfd2, 0); EXPECT_GE(self->child_pid2, 0); if (self->child_pid2 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset) info->name, self->child_pid1); } } + + self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds1[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid1); + } + } + + self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds2[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid2); + } + } } close(proc_fd); @@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset) close(self->child_nsfds1[i]); if (self->child_nsfds2[i] >= 0) close(self->child_nsfds2[i]); + if (self->child_pidfd_derived_nsfds[i] >= 0) + close(self->child_pidfd_derived_nsfds[i]); + if (self->child_pidfd_derived_nsfds1[i] >= 0) + close(self->child_pidfd_derived_nsfds1[i]); + if (self->child_pidfd_derived_nsfds2[i] >= 0) + close(self->child_pidfd_derived_nsfds2[i]); } if (self->child_pidfd1 >= 0) @@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns) } } +TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_pidfd_derived_nsfds1[i] < 0) + continue; + + if (info->flag) { + ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->child_pidfd_derived_nsfds[i]; + else + nsfd = self->child_pidfd_derived_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]); + } +} + TEST_F(current_nsset, pidfd_one_shot_setns) { unsigned flags = 0; @@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play) info->name, self->child_pid2, self->child_nsfds2[i]); } + + /* + * Can't setns to a user namespace outside of our hierarchy since we + * don't have caps in there and didn't create it. That means that under + * no circumstances should we be able to setns to any of the other + * ones since they aren't owned by our user namespace. + */ + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag) + continue; + + ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } } TEST(setns_einval) diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index a156ac5dd2c6..973968f45bba 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -2,6 +2,7 @@ /fd-001-lookup /fd-002-posix-eq /fd-003-kthread +/proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 /proc-multiple-procfs @@ -9,6 +10,7 @@ /proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 +/proc-self-isnt-kthread /proc-self-syscall /proc-self-wchan /proc-subset-pid diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index cd95369254c0..b12921b9794b 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,17 +1,19 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function -CFLAGS += -D_GNU_SOURCE +CFLAGS += $(TOOLS_INCLUDES) LDFLAGS += -pthread TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread +TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-isnt-kthread TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan TEST_GEN_PROGS += proc-subset-pid diff --git a/tools/testing/selftests/proc/proc-2-is-kthread.c b/tools/testing/selftests/proc/proc-2-is-kthread.c new file mode 100644 index 000000000000..f13668fb482e --- /dev/null +++ b/tools/testing/selftests/proc/proc-2-is-kthread.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that kernel thread is reported as such. */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + /* + * The following solutions don't really work: + * + * 1) jit kernel module which creates kernel thread: + * test becomes arch-specific, + * problems with mandatory module signing, + * problems with lockdown mode, + * doesn't work with CONFIG_MODULES=n at all, + * kthread creation API is formally unstable internal kernel API, + * need a mechanism to report test kernel thread's PID back, + * + * 2) ksoftirqd/0 and kswapd0 look like stable enough kernel threads, + * but their PIDs are unstable. + * + * Check against kthreadd which always seem to exist under pid 2. + */ + int fd = open("/proc/2/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + assert(strstr(buf, "Kthread:\t1\n")); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 56198d4ca2bf..b3f898aab4ab 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -381,9 +381,6 @@ static int test_proc_pid_statm(pid_t pid) assert(rv >= 0); assert(rv <= sizeof(buf)); - if (0) { - write(1, buf, rv); - } const char *p = buf; const char *const end = p + rv; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index cacbd2a4aec9..d04685771952 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -45,6 +45,7 @@ #include <linux/kdev_t.h> #include <sys/time.h> #include <sys/resource.h> +#include <linux/fs.h> #include "../kselftest.h" @@ -492,6 +493,91 @@ int main(void) assert(buf[13] == '\n'); } + /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */ + { + char path_buf[256], exp_path_buf[256]; + struct procmap_query q; + int fd, err; + + snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid); + fd = open(path_buf, O_RDONLY); + if (fd == -1) + return 1; + + /* CASE 1: exact MATCH at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = 0; + q.vma_name_addr = (__u64)(unsigned long)path_buf; + q.vma_name_size = sizeof(path_buf); + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR); + assert(q.query_flags == 0); + + assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE)); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + assert(q.vma_page_size == PAGE_SIZE); + + assert(q.vma_offset == 0); + assert(q.inode == st.st_ino); + assert(q.dev_major == MAJOR(st.st_dev)); + assert(q.dev_minor == MINOR(st.st_dev)); + + snprintf(exp_path_buf, sizeof(exp_path_buf), + "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino); + assert(q.vma_name_size == strlen(exp_path_buf) + 1); + assert(strcmp(path_buf, exp_path_buf) == 0); + + /* CASE 2: NO MATCH at VADDR-1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = 0; /* exact match */ + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR - 1); + assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + + /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */ + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 5: NO MATCH WRITABLE at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = PROCMAP_QUERY_VMA_WRITABLE; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + } + return 0; } #else diff --git a/tools/testing/selftests/proc/proc-self-isnt-kthread.c b/tools/testing/selftests/proc/proc-self-isnt-kthread.c new file mode 100644 index 000000000000..e01f4e0a91b4 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-isnt-kthread.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that userspace program is not kernel thread. */ +#undef NDEBUG +#include <assert.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + int fd = open("/proc/self/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + /* This test is very much not kernel thread. */ + assert(strstr(buf, "Kthread:\t0\n")); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 021863f86053..f408bd6bfc3d 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile index 627c5fa6d1ab..23605782639e 100644 --- a/tools/testing/selftests/ring-buffer/Makefile +++ b/tools/testing/selftests/ring-buffer/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -Wl,-no-as-needed -Wall CFLAGS += $(KHDR_INCLUDES) -CFLAGS += -D_GNU_SOURCE TEST_GEN_PROGS = map_test diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index c333263f2b27..4664ed79e20b 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -3,7 +3,7 @@ # Originally tools/testing/arm64/abi/Makefile # Additional include paths needed by kselftest.h and local headers -CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. TEST_GEN_FILES := mmap_default mmap_bottomup diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 27668fb3b6d0..895177f6bf4c 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -88,16 +88,16 @@ int main(void) return -2; } - if (!(pair.value & RISCV_HWPROBE_IMA_V)) { + if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { rc = prctl(PR_RISCV_V_GET_CONTROL); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -3; } rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -4; } diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 867f88ce2570..03b5e13b872b 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -12,7 +12,7 @@ OBJCOPY := $(CROSS_COMPILE)objcopy endif INCLUDES := -I$(top_srcdir)/tools/include -HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC $(CFLAGS) HOST_LDFLAGS := -z noexecstack -lcrypto ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h index ea9bdf3a90b1..09da8f1011ce 100644 --- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h +++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h @@ -8,7 +8,7 @@ register unsigned long sp asm("sp"); register unsigned long sp asm("esp"); #elif __loongarch64 register unsigned long sp asm("$sp"); -#elif __ppc__ +#elif __powerpc__ register unsigned long sp asm("r1"); #elif __s390x__ register unsigned long sp asm("%15"); diff --git a/tools/testing/selftests/tmpfs/Makefile b/tools/testing/selftests/tmpfs/Makefile index aa11ccc92e5b..3be931e1193f 100644 --- a/tools/testing/selftests/tmpfs/Makefile +++ b/tools/testing/selftests/tmpfs/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -CFLAGS += -D_GNU_SOURCE TEST_GEN_PROGS := TEST_GEN_PROGS += bug-link-o-tmpfile diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index a8dc51af5a9c..30d5c8f0e5c7 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -6,3 +6,5 @@ vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 +vdso_test_getrandom +vdso_test_chacha diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 98d8ba2afa00..3de8e7e052ae 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -10,6 +11,12 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness +ifeq ($(uname_M),x86_64) +TEST_GEN_PROGS += vdso_test_getrandom +ifneq ($(SODIUM),) +TEST_GEN_PROGS += vdso_test_chacha +endif +endif CFLAGS := -std=gnu99 @@ -28,3 +35,14 @@ $(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind- $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl + +$(OUTPUT)/vdso_test_getrandom: parse_vdso.c +$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ + -isystem $(top_srcdir)/include/uapi + +$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ + -isystem $(top_srcdir)/arch/$(ARCH)/include \ + -isystem $(top_srcdir)/include \ + -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ + -Wa,--noexecstack $(SODIUM) diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c new file mode 100644 index 000000000000..e38f44e5f803 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <sodium/crypto_stream_chacha20.h> +#include <sys/random.h> +#include <string.h> +#include <stdint.h> +#include "../kselftest.h" + +extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks); + +int main(int argc, char *argv[]) +{ + enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 }; + static const uint8_t nonce[8] = { 0 }; + uint32_t counter[2]; + uint8_t key[32]; + uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS]; + + ksft_print_header(); + ksft_set_plan(1); + + for (unsigned int trial = 0; trial < TRIALS; ++trial) { + if (getrandom(key, sizeof(key), 0) != sizeof(key)) { + printf("getrandom() failed!\n"); + return KSFT_SKIP; + } + crypto_stream_chacha20(output1, sizeof(output1), nonce, key); + for (unsigned int split = 0; split < BLOCKS; ++split) { + memset(output2, 'X', sizeof(output2)); + memset(counter, 0, sizeof(counter)); + if (split) + __arch_chacha20_blocks_nostack(output2, key, counter, split); + __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split); + if (memcmp(output1, output2, sizeof(output1))) + return KSFT_FAIL; + } + } + ksft_test_result_pass("chacha: PASS\n"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c new file mode 100644 index 000000000000..05122425a873 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <assert.h> +#include <pthread.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <signal.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/random.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/random.h> + +#include "../kselftest.h" +#include "parse_vdso.h" + +#ifndef timespecsub +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ + } while (0) +#endif + +static struct { + pthread_mutex_t lock; + void **states; + size_t len, cap; +} grnd_allocator = { + .lock = PTHREAD_MUTEX_INITIALIZER +}; + +static struct { + ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t); + pthread_key_t key; + pthread_once_t initialized; + struct vgetrandom_opaque_params params; +} grnd_ctx = { + .initialized = PTHREAD_ONCE_INIT +}; + +static void *vgetrandom_get_state(void) +{ + void *state = NULL; + + pthread_mutex_lock(&grnd_allocator.lock); + if (!grnd_allocator.len) { + size_t page_size = getpagesize(); + size_t new_cap; + size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ + void *new_block, *new_states; + + alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); + num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size); + new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0); + if (new_block == MAP_FAILED) + goto out; + + new_cap = grnd_allocator.cap + num; + new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states)); + if (!new_states) + goto unmap; + grnd_allocator.cap = new_cap; + grnd_allocator.states = new_states; + + for (size_t i = 0; i < num; ++i) { + if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size) + new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); + grnd_allocator.states[i] = new_block; + new_block += grnd_ctx.params.size_of_opaque_state; + } + grnd_allocator.len = num; + goto success; + + unmap: + munmap(new_block, alloc_size); + goto out; + } +success: + state = grnd_allocator.states[--grnd_allocator.len]; + +out: + pthread_mutex_unlock(&grnd_allocator.lock); + return state; +} + +static void vgetrandom_put_state(void *state) +{ + if (!state) + return; + pthread_mutex_lock(&grnd_allocator.lock); + grnd_allocator.states[grnd_allocator.len++] = state; + pthread_mutex_unlock(&grnd_allocator.lock); +} + +static void vgetrandom_init(void) +{ + if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0) + return; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + exit(KSFT_SKIP); + } + vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); + grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom"); + if (!grnd_ctx.fn) { + printf("__vdso_getrandom is missing!\n"); + exit(KSFT_FAIL); + } + if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) { + printf("failed to fetch vgetrandom params!\n"); + exit(KSFT_FAIL); + } +} + +static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) +{ + void *state; + + pthread_once(&grnd_ctx.initialized, vgetrandom_init); + state = pthread_getspecific(grnd_ctx.key); + if (!state) { + state = vgetrandom_get_state(); + if (pthread_setspecific(grnd_ctx.key, state) != 0) { + vgetrandom_put_state(state); + state = NULL; + } + if (!state) { + printf("vgetrandom_get_state failed!\n"); + exit(KSFT_FAIL); + } + } + return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state); +} + +enum { TRIALS = 25000000, THREADS = 256 }; + +static void *test_vdso_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = vgetrandom(&val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void *test_libc_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = getrandom(&val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void *test_syscall_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void bench_single(void) +{ + struct timespec start, end, diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + test_vdso_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" vdso: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + test_libc_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" libc: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + test_syscall_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf("syscall: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); +} + +static void bench_multi(void) +{ + struct timespec start, end, diff; + pthread_t threads[THREADS]; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" vdso: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" libc: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" syscall: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); +} + +static void fill(void) +{ + uint8_t weird_size[323929]; + for (;;) + vgetrandom(weird_size, sizeof(weird_size), 0); +} + +static void kselftest(void) +{ + uint8_t weird_size[1263]; + + ksft_print_header(); + ksft_set_plan(1); + + for (size_t i = 0; i < 1000; ++i) { + ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); + if (ret != sizeof(weird_size)) + exit(KSFT_FAIL); + } + + ksft_test_result_pass("getrandom: PASS\n"); + exit(KSFT_PASS); +} + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s [bench-single|bench-multi|fill]\n", argv0); +} + +int main(int argc, char *argv[]) +{ + if (argc == 1) { + kselftest(); + return 0; + } + + if (argc != 2) { + usage(argv[0]); + return 1; + } + if (!strcmp(argv[1], "bench-single")) + bench_single(); + else if (!strcmp(argv[1], "bench-multi")) + bench_multi(); + else if (!strcmp(argv[1], "fill")) + fill(); + else { + usage(argv[0]); + return 1; + } + return 0; +} diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config index b25e531a1f95..0fe6b50f029b 100644 --- a/tools/tracing/latency/Makefile.config +++ b/tools/tracing/latency/Makefile.config @@ -3,8 +3,9 @@ STOP_ERROR := define lib_setup - $(eval EXTLIBS += -l$(1)) $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) + $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) + $(eval EXTLIBS += $(shell sh -c "$(PKG_CONFIG) --libs-only-l lib$(1)")) endef $(call feature_check,libtraceevent) diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 0b7ecfb30d19..5f8c286712d4 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -7,7 +7,8 @@ LIBTRACEFS_MIN_VERSION = 1.6 define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) - $(eval EXTLIBS += -l$(1)) + $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) + $(eval EXTLIBS += $(shell sh -c "$(PKG_CONFIG) --libs-only-l lib$(1)")) endef $(call feature_check,libtraceevent) diff --git a/tools/verification/rv/Makefile.config b/tools/verification/rv/Makefile.config index 6d4ba77847b6..066302230eb2 100644 --- a/tools/verification/rv/Makefile.config +++ b/tools/verification/rv/Makefile.config @@ -7,7 +7,8 @@ LIBTRACEFS_MIN_VERSION = 1.3 define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) - $(eval EXTLIBS += -l$(1)) + $(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)")) + $(eval EXTLIBS += $(shell sh -c "$(PKG_CONFIG) --libs-only-l lib$(1)")) endef $(call feature_check,libtraceevent) |