diff options
Diffstat (limited to 'tools')
803 files changed, 25530 insertions, 6821 deletions
diff --git a/tools/arch/arm64/include/asm/brk-imm.h b/tools/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 000000000000..beb42c62b6ac --- /dev/null +++ b/tools/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * 0x004: for installing kprobes + * 0x005: for installing uprobes + * 0x006: for kprobe software single-step + * 0x007: for kretprobe return + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) + * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8) + * 0x8xxx: Control-Flow Integrity traps + */ +#define KPROBES_BRK_IMM 0x004 +#define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 +#define KRETPROBES_BRK_IMM 0x007 +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 +#define KASAN_BRK_IMM 0x900 +#define KASAN_BRK_MASK 0x0ff +#define UBSAN_BRK_IMM 0x5500 +#define UBSAN_BRK_MASK 0x00ff + +#define CFI_BRK_IMM_TARGET GENMASK(4, 0) +#define CFI_BRK_IMM_TYPE GENMASK(9, 5) +#define CFI_BRK_IMM_BASE 0x8000 +#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE) + +#endif diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h new file mode 100644 index 000000000000..bd592ca81571 --- /dev/null +++ b/tools/arch/arm64/include/asm/esr.h @@ -0,0 +1,455 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ASM_ESR_H +#define __ASM_ESR_H + +#include <asm/sysreg.h> + +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) +/* Unallocated EC: 0x1E */ +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 UL(0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 UL(0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX UL(0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_WIDTH (6) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL_SHIFT (25) +#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR_SHIFT (6) +#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT) + +/* Asynchronous Error Type */ +#define ESR_ELx_IDS_SHIFT (24) +#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT) +#define ESR_ELx_AET_SHIFT (10) +#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT) + +#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_SET_SHIFT (11) +#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) +#define ESR_ELx_FnV_SHIFT (10) +#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT) +#define ESR_ELx_EA_SHIFT (9) +#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT) +#define ESR_ELx_S1PTW_SHIFT (7) +#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_MTE (0x11) +#define ESR_ELx_FSC_SERROR (0x11) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) + +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) + +/* ISS field definitions for Data Aborts */ +#define ESR_ELx_ISV_SHIFT (24) +#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE_SHIFT (21) +#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF_SHIFT (15) +#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT) +#define ESR_ELx_AR_SHIFT (14) +#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT) +#define ESR_ELx_CM_SHIFT (8) +#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) + +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + +/* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) +#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5) +#define ESR_ELx_WFx_ISS_RV (UL(1) << 2) +#define ESR_ELx_WFx_ISS_TI (UL(3) << 0) +#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1) + +#define DISR_EL1_IDS (UL(1) << 24) +/* + * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean + * different things in the future... + */ +#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) + +/* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \ + (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT)) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff + +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define esr_sys64_to_sysreg(e) \ + sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \ + ESR_ELx_SYS64_ISS_OP0_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +#define esr_cp15_to_sysreg(e) \ + sys_reg(3, \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +/* ISS field definitions for ERET/ERETAA/ERETAB trapping */ +#define ESR_ELx_ERET_ISS_ERET 0x2 +#define ESR_ELx_ERET_ISS_ERETA 0x1 + +/* + * ISS field definitions for floating-point exception traps + * (FP_EXC_32/FP_EXC_64). + * + * (The FPEXC_* constants are used instead for common bits.) + */ + +#define ESR_ELx_FP_EXC_TFV (UL(1) << 23) + +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_ZT_DISABLED 4 + +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + +static inline bool esr_is_data_abort(unsigned long esr) +{ + const unsigned long ec = ESR_ELx_EC(esr); + + return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; +} + +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); +} + +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + +const char *esr_get_class_string(unsigned long esr); +#endif /* __ASSEMBLY */ + +#endif /* __ASM_ESR_H */ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 964df31da975..66736ff04011 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -484,6 +484,12 @@ enum { */ #define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) +/* + * Shutdown caused by a PSCI v1.3 SYSTEM_OFF2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_SHUTDOWN. + */ +#define KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2 (1ULL << 0) + /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 05eaf6db3ad4..60345dd2cba2 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -469,7 +469,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kdsa[16]; /* with MSA9 */ __u8 sortl[32]; /* with STFLE.150 */ __u8 dfltcc[32]; /* with STFLE.151 */ - __u8 reserved[1728]; + __u8 pfcr[16]; /* with STFLE.201 */ + __u8 reserved[1712]; }; #define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 23698d0f4bb4..17b6590748c0 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -215,7 +215,7 @@ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ #define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ #define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */ #define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ @@ -317,6 +317,9 @@ #define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */ +#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */ +#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ @@ -348,6 +351,7 @@ #define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ #define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */ #define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ @@ -472,7 +476,9 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ /* * BUG word(s) @@ -523,4 +529,5 @@ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index a8debbf2f702..88585c1de416 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -440,6 +440,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) +#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 9b898571b49e..23f488cf1740 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -54,6 +54,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[refs->ref_cnt]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt++; return; @@ -77,6 +78,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[0]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt = 1; refs->has_bpf_cookie = e->has_bpf_cookie; refs->bpf_cookie = e->bpf_cookie; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ff949ea82fa..e71be67f1d86 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -822,11 +822,18 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, printf("%s:\n", sym_name); } - if (disasm_print_insn(img, lens[i], opcodes, - name, disasm_opt, btf, - prog_linfo, ksyms[i], i, - linum)) - goto exit_free; + if (ksyms) { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum)) + goto exit_free; + } else { + if (disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + NULL, 0, 0, false)) + goto exit_free; + } img += lens[i]; diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ffd117135094..bca47d136f05 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -30,9 +30,7 @@ endef # FEATURE_TESTS_BASIC := \ backtrace \ - dwarf \ - dwarf_getlocations \ - dwarf_getcfi \ + libdw \ eventfd \ fortify-source \ get_current_dir_name \ @@ -53,6 +51,7 @@ FEATURE_TESTS_BASIC := \ libslang-include-subdir \ libtraceevent \ libtracefs \ + libcpupower \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ @@ -60,7 +59,6 @@ FEATURE_TESTS_BASIC := \ reallocarray \ stackprotector-all \ timerfd \ - libdw-dwarf-unwind \ zlib \ lzma \ get_cpuid \ @@ -120,8 +118,7 @@ ifeq ($(FEATURE_TESTS),all) endif FEATURE_DISPLAY ?= \ - dwarf \ - dwarf_getlocations \ + libdw \ glibc \ libbfd \ libbfd-buildid \ @@ -133,7 +130,6 @@ FEATURE_DISPLAY ?= \ libpython \ libcrypto \ libunwind \ - libdw-dwarf-unwind \ libcapstone \ llvm-perf \ zlib \ @@ -233,7 +229,7 @@ endef # # generates feature value assignment for name, like: -# $(call feature_assign,dwarf) == feature-dwarf=1 +# $(call feature_assign,libdw) == feature-libdw=1 # feature_assign = feature-$(1)=$(feature-$(1)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 1658596188bf..043dfd00fce7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -5,9 +5,7 @@ FILES= \ test-all.bin \ test-backtrace.bin \ test-bionic.bin \ - test-dwarf.bin \ - test-dwarf_getlocations.bin \ - test-dwarf_getcfi.bin \ + test-libdw.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -38,6 +36,7 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ + test-libcpupower.bin \ test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ @@ -52,7 +51,6 @@ FILES= \ test-pthread-barrier.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ - test-libdw-dwarf-unwind.bin \ test-libbabeltrace.bin \ test-libcapstone.bin \ test-compile-32.bin \ @@ -168,9 +166,9 @@ $(OUTPUT)test-libopencsd.bin: $(BUILD) # -lopencsd_c_api -lopencsd provided by # $(FEATURE_CHECK_LDFLAGS-libopencsd) -DWARFLIBS := -ldw +DWLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + DWLIBS += -lelf -lz -llzma -lbz2 -lzstd LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) @@ -179,21 +177,15 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) # Elfutils merged libebl.a into libdw.a starting from version 0.177, # Link libebl.a only if libdw is older than this version. ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) - DWARFLIBS += -lebl + DWLIBS += -lebl endif # Must put -ldl after -lebl for dependency DWARFLIBS += -ldl endif -$(OUTPUT)test-dwarf.bin: - $(BUILD) $(DWARFLIBS) - -$(OUTPUT)test-dwarf_getlocations.bin: - $(BUILD) $(DWARFLIBS) - -$(OUTPUT)test-dwarf_getcfi.bin: - $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-libdw.bin: + $(BUILD) $(DWLIBS) $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf @@ -248,6 +240,9 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent +$(OUTPUT)test-libcpupower.bin: + $(BUILD) -lcpupower + $(OUTPUT)test-libtracefs.bin: $(BUILD) $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -ltracefs @@ -317,9 +312,6 @@ $(OUTPUT)test-backtrace.bin: $(OUTPUT)test-timerfd.bin: $(BUILD) -$(OUTPUT)test-libdw-dwarf-unwind.bin: - $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) - $(OUTPUT)test-libbabeltrace.bin: $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 6f4bf386a3b5..59ef3d7fe6a4 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -38,12 +38,8 @@ # include "test-glibc.c" #undef main -#define main main_test_dwarf -# include "test-dwarf.c" -#undef main - -#define main main_test_dwarf_getlocations -# include "test-dwarf_getlocations.c" +#define main main_test_libdw +# include "test-libdw.c" #undef main #define main main_test_eventfd @@ -98,10 +94,6 @@ # include "test-stackprotector-all.c" #undef main -#define main main_test_libdw_dwarf_unwind -# include "test-libdw-dwarf-unwind.c" -#undef main - #define main main_test_zlib # include "test-zlib.c" #undef main @@ -187,8 +179,7 @@ int main(int argc, char *argv[]) main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); - main_test_dwarf(); - main_test_dwarf_getlocations(); + main_test_libdw(); main_test_eventfd(); main_test_libelf_getphdrnum(); main_test_libelf_gelf_getnote(); @@ -202,7 +193,6 @@ int main(int argc, char *argv[]) main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); - main_test_libdw_dwarf_unwind(); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/build/feature/test-dwarf.c b/tools/build/feature/test-dwarf.c deleted file mode 100644 index 8d474bd7371b..000000000000 --- a/tools/build/feature/test-dwarf.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <dwarf.h> -#include <elfutils/libdw.h> -#include <elfutils/version.h> - -int main(void) -{ - Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); - - return (long)dbg; -} diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c deleted file mode 100644 index 50e7d7cb7bdf..000000000000 --- a/tools/build/feature/test-dwarf_getcfi.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf *dwarf = NULL; - return dwarf_getcfi(dwarf) == NULL; -} diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c deleted file mode 100644 index 78fb4a1fa68c..000000000000 --- a/tools/build/feature/test-dwarf_getlocations.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdlib.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf_Addr base, start, end; - Dwarf_Attribute attr; - Dwarf_Op *op; - size_t nops; - ptrdiff_t offset = 0; - return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); -} diff --git a/tools/build/feature/test-libcpupower.c b/tools/build/feature/test-libcpupower.c new file mode 100644 index 000000000000..a346aa332a71 --- /dev/null +++ b/tools/build/feature/test-libcpupower.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <cpuidle.h> + +int main(void) +{ + int rv = cpuidle_state_count(0); + return rv; +} diff --git a/tools/build/feature/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c deleted file mode 100644 index ed03d9505609..000000000000 --- a/tools/build/feature/test-libdw-dwarf-unwind.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <elfutils/libdwfl.h> - -int main(void) -{ - /* - * This function is guarded via: __nonnull_attribute__ (1, 2). - * Passing '1' as arguments value. This code is never executed, - * only compiled. - */ - dwfl_thread_getframes((void *) 1, (void *) 1, NULL); - return 0; -} diff --git a/tools/build/feature/test-libdw.c b/tools/build/feature/test-libdw.c new file mode 100644 index 000000000000..2fb59479ab77 --- /dev/null +++ b/tools/build/feature/test-libdw.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <elfutils/version.h> + +int test_libdw(void) +{ + Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + + return (long)dbg; +} + +int test_libdw_unwind(void) +{ + /* + * This function is guarded via: __nonnull_attribute__ (1, 2). + * Passing '1' as arguments value. This code is never executed, + * only compiled. + */ + dwfl_thread_getframes((void *) 1, (void *) 1, NULL); + return 0; +} + +int test_libdw_getlocations(void) +{ + Dwarf_Addr base, start, end; + Dwarf_Attribute attr; + Dwarf_Op *op; + size_t nops; + ptrdiff_t offset = 0; + + return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); +} + +int test_libdw_getcfi(void) +{ + Dwarf *dwarf = NULL; + + return dwarf_getcfi(dwarf) == NULL; +} + +int test_elfutils(void) +{ + Dwarf_CFI *cfi = NULL; + + dwarf_cfi_end(cfi); + return 0; +} + +int main(void) +{ + return test_libdw() + test_libdw_unwind() + test_libdw_getlocations() + + test_libdw_getcfi() + test_elfutils(); +} diff --git a/tools/build/feature/test-libtraceevent.c b/tools/build/feature/test-libtraceevent.c index 416b11ffd4b4..804ad80dbbd9 100644 --- a/tools/build/feature/test-libtraceevent.c +++ b/tools/build/feature/test-libtraceevent.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <traceevent/trace-seq.h> +#include <trace-seq.h> int main(void) { diff --git a/tools/firewire/decode-fcp.c b/tools/firewire/decode-fcp.c index b67ebc88434d..f115a3be8d1e 100644 --- a/tools/firewire/decode-fcp.c +++ b/tools/firewire/decode-fcp.c @@ -160,7 +160,7 @@ decode_avc(struct link_transaction *t) name = info->name; } - printf("av/c %s, subunit_type=%s, subunit_id=%d, opcode=%s", + printf("av/c %s, subunit_type=%s, subunit_id=%u, opcode=%s", ctype_names[frame->ctype], subunit_type_names[frame->subunit_type], frame->subunit_id, name); diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c index 156e0356e814..9a906de3a9ef 100644 --- a/tools/firewire/nosy-dump.c +++ b/tools/firewire/nosy-dump.c @@ -771,7 +771,7 @@ print_packet(uint32_t *data, size_t length) if (pp->phy_config.set_root) printf(" set_root_id=%02x", pp->phy_config.root_id); if (pp->phy_config.set_gap_count) - printf(" set_gap_count=%d", pp->phy_config.gap_count); + printf(" set_gap_count=%u", pp->phy_config.gap_count); } break; @@ -781,13 +781,13 @@ print_packet(uint32_t *data, size_t length) case PHY_PACKET_SELF_ID: if (pp->self_id.extended) { - printf("extended self id: phy_id=%02x, seq=%d", + printf("extended self id: phy_id=%02x, seq=%u", pp->ext_self_id.phy_id, pp->ext_self_id.sequence); } else { static const char * const speed_names[] = { "S100", "S200", "S400", "BETA" }; - printf("self id: phy_id=%02x, link %s, gap_count=%d, speed=%s%s%s", + printf("self id: phy_id=%02x, link %s, gap_count=%u speed=%s%s%s", pp->self_id.phy_id, (pp->self_id.link_active ? "active" : "not active"), pp->self_id.gap_count, diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore new file mode 100644 index 000000000000..0c5bc15d602f --- /dev/null +++ b/tools/hv/.gitignore @@ -0,0 +1,3 @@ +hv_fcopy_uio_daemon +hv_kvp_daemon +hv_vss_daemon diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 7a00f3066a98..0198321d14a2 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,8 +35,6 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define MAX_FOLDER_NAME 15 -#define MAX_PATH_LEN 15 #define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" #define FCOPY_VER_COUNT 1 @@ -51,7 +49,7 @@ static const int fw_versions[] = { #define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ -unsigned char desc[HV_RING_SIZE]; +static unsigned char desc[HV_RING_SIZE]; static int target_fd; static char target_fname[PATH_MAX]; @@ -409,8 +407,8 @@ int main(int argc, char *argv[]) struct vmbus_br txbr, rxbr; void *ring; uint32_t len = HV_RING_SIZE; - char uio_name[MAX_FOLDER_NAME] = {0}; - char uio_dev_path[MAX_PATH_LEN] = {0}; + char uio_name[NAME_MAX] = {0}; + char uio_dev_path[PATH_MAX] = {0}; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -468,8 +466,10 @@ int main(int argc, char *argv[]) */ ret = pread(fcopy_fd, &tmp, sizeof(int), 0); if (ret < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - continue; + goto close; } len = HV_RING_SIZE; diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh index 058c17b46ffc..268521234d4b 100755 --- a/tools/hv/hv_get_dns_info.sh +++ b/tools/hv/hv_get_dns_info.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh # This example script parses /etc/resolv.conf to retrive DNS information. # In the interest of keeping the KVP daemon code free of distro specific @@ -10,4 +10,4 @@ # this script can be based on the Network Manager APIs for retrieving DNS # entries. -cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }' +exec awk '/^nameserver/ { print $2 }' /etc/resolv.conf 2>/dev/null diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index ae57bf69ad4a..04ba035d67e9 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -725,7 +725,7 @@ static void kvp_get_ipconfig_info(char *if_name, * . */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info"); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dns_info", if_name); /* * Execute the command to gather DNS info. @@ -742,7 +742,7 @@ static void kvp_get_ipconfig_info(char *if_name, * Enabled: DHCP enabled. */ - sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name); + sprintf(cmd, "exec %s %s", KVP_SCRIPTS_PATH "hv_get_dhcp_info", if_name); file = popen(cmd, "r"); if (file == NULL) @@ -1606,8 +1606,9 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) * invoke the external script to do its magic. */ - str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s %s", - "hv_set_ifconfig", if_filename, nm_filename); + str_len = snprintf(cmd, sizeof(cmd), "exec %s %s %s", + KVP_SCRIPTS_PATH "hv_set_ifconfig", + if_filename, nm_filename); /* * This is a little overcautious, but it's necessary to suppress some * false warnings from gcc 8.0.1. diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh index 440a91b35823..2f8baed2b8f7 100755 --- a/tools/hv/hv_set_ifconfig.sh +++ b/tools/hv/hv_set_ifconfig.sh @@ -81,7 +81,7 @@ echo "ONBOOT=yes" >> $1 cp $1 /etc/sysconfig/network-scripts/ -chmod 600 $2 +umask 0177 interface=$(echo $2 | awk -F - '{ print $2 }') filename="${2##*/}" diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 8073c9e4fe46..cccf62ea2b8f 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -63,6 +63,7 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_DELTA_VELOCITY] = "deltavelocity", [IIO_COLORTEMP] = "colortemp", [IIO_CHROMATICITY] = "chromaticity", + [IIO_ATTENTION] = "attention", }; static const char * const iio_ev_type_text[] = { @@ -183,6 +184,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_DELTA_VELOCITY: case IIO_COLORTEMP: case IIO_CHROMATICITY: + case IIO_ATTENTION: break; default: return false; @@ -449,6 +451,7 @@ error_free_chrdev_name: enable_events(dev_dir_name, 0); free(chrdev_name); + free(dev_dir_name); return ret; } diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 62e7c901ac28..e20f98e14e81 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_LINUX_COMPILER_H_ -#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." +#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." #endif /* diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 453a4f4ef39d..df5d9fa84dba 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -54,4 +54,16 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..1ea2c4c33b86 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -79,6 +79,9 @@ #define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ +#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */ +#define MADV_GUARD_REMOVE 103 /* unguard range */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 406f7718f9ad..51d2556af54a 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -19,4 +19,8 @@ #define MCL_FUTURE 2 /* lock all future mappings */ #define MCL_ONFAULT 4 /* lock all pages that are faulted in */ +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack marker in the shadow stack */ + + #endif /* __ASM_GENERIC_MMAN_H */ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 5bf6148cac2b..88dc393c2bca 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -841,8 +841,17 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_setxattrat 463 +__SYSCALL(__NR_setxattrat, sys_setxattrat) +#define __NR_getxattrat 464 +__SYSCALL(__NR_getxattrat, sys_getxattrat) +#define __NR_listxattrat 465 +__SYSCALL(__NR_listxattrat, sys_listxattrat) +#define __NR_removexattrat 466 +__SYSCALL(__NR_removexattrat, sys_removexattrat) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 467 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 16122819edfe..7fba37b94401 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1024,6 +1024,13 @@ struct drm_crtc_queue_sequence { __u64 user_data; /* user data passed to event */ }; +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + + #if defined(__cplusplus) } #endif @@ -1288,6 +1295,16 @@ extern "C" { */ #define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 637efc055145..502ea63b5d2e 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1158,7 +1158,15 @@ enum kvm_device_type { #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_RISCV_AIA, #define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA + KVM_DEV_TYPE_LOONGARCH_IPI, +#define KVM_DEV_TYPE_LOONGARCH_IPI KVM_DEV_TYPE_LOONGARCH_IPI + KVM_DEV_TYPE_LOONGARCH_EIOINTC, +#define KVM_DEV_TYPE_LOONGARCH_EIOINTC KVM_DEV_TYPE_LOONGARCH_EIOINTC + KVM_DEV_TYPE_LOONGARCH_PCHPIC, +#define KVM_DEV_TYPE_LOONGARCH_PCHPIC KVM_DEV_TYPE_LOONGARCH_PCHPIC + KVM_DEV_TYPE_MAX, + }; struct kvm_vfio_spapr_tce { diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 4842c36fdf80..0524d541d4e3 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -511,7 +511,16 @@ struct perf_event_attr { __u16 sample_max_stack; __u16 __reserved_2; __u32 aux_sample_size; - __u32 __reserved_3; + + union { + __u32 aux_action; + struct { + __u32 aux_start_paused : 1, /* start AUX area tracing paused */ + aux_pause : 1, /* on overflow, pause AUX area tracing */ + aux_resume : 1, /* on overflow, resume AUX area tracing */ + __reserved_3 : 29; + }; + }; /* * User provided data if sigtrap=1, passed back to user via diff --git a/tools/include/uapi/linux/stddef.h b/tools/include/uapi/linux/stddef.h index bb6ea517efb5..c53cde425406 100644 --- a/tools/include/uapi/linux/stddef.h +++ b/tools/include/uapi/linux/stddef.h @@ -8,6 +8,13 @@ #define __always_inline __inline__ #endif +/* Not all C++ standards support type declarations inside an anonymous union */ +#ifndef __cplusplus +#define __struct_group_tag(TAG) TAG +#else +#define __struct_group_tag(TAG) +#endif + /** * __struct_group() - Create a mirrored named and anonyomous struct * @@ -20,14 +27,14 @@ * and size: one anonymous and one named. The former's members can be used * normally without sub-struct naming, and the latter can be used to * reason about the start, end, and size of the group of struct members. - * The named struct can also be explicitly tagged for layer reuse, as well - * as both having struct attributes appended. + * The named struct can also be explicitly tagged for layer reuse (C only), + * as well as both having struct attributes appended. */ #define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \ union { \ struct { MEMBERS } ATTRS; \ - struct TAG { MEMBERS } ATTRS NAME; \ - } + struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \ + } ATTRS /** * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index d3eb04d1bc89..1731996b2c32 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -189,6 +189,7 @@ static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_ err_out: free(line); *line_out = NULL; + *line_len_out = 0; return -ENOMEM; } diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 69affa251fa7..bb99e493dcd1 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index c6d67fc9e57e..83c43dc13313 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -47,6 +47,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, */ perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); + + /* + * Empty cpu lists would eventually get opened as "any" so remove + * genuinely empty ones before they're opened in the wrong place. + */ + if (perf_cpu_map__is_empty(evsel->cpus)) { + struct perf_evsel *next = perf_evlist__next(evlist, evsel); + + perf_evlist__remove(evlist, evsel); + /* Keep idx contiguous */ + if (next) + list_for_each_entry_from(next, &evlist->entries, node) + next->idx--; + } } else if (!evsel->own_cpus || evlist->has_user_cpus || (!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { /* @@ -80,11 +94,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel, *n; evlist->needs_map_propagation = true; - perf_evlist__for_each_evsel(evlist, evsel) + list_for_each_entry_safe(evsel, n, &evlist->entries, node) __perf_evlist__propagate_maps(evlist, evsel); } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c07160953224..c475319e2e41 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 5cd220a61962..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,8 +60,32 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { @@ -58,6 +108,10 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 4e3a557a2f37..0a764c25c384 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -2,6 +2,7 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> +#include <ctype.h> #include <fcntl.h> #include <string.h> #include <linux/string.h> @@ -217,8 +218,40 @@ static int wait_or_whine(struct child_process *cmd, bool block) int check_if_command_finished(struct child_process *cmd) { +#ifdef __linux__ + char filename[FILENAME_MAX + 12]; + char status_line[256]; + FILE *status_file; + + /* + * Check by reading /proc/<pid>/status as calling waitpid causes + * stdout/stderr to be closed and data lost. + */ + sprintf(filename, "/proc/%d/status", cmd->pid); + status_file = fopen(filename, "r"); + if (status_file == NULL) { + /* Open failed assume finish_command was called. */ + return true; + } + while (fgets(status_line, sizeof(status_line), status_file) != NULL) { + char *p; + + if (strncmp(status_line, "State:", 6)) + continue; + + fclose(status_file); + p = status_line + 6; + while (isspace(*p)) + p++; + return *p == 'Z' ? 1 : 0; + } + /* Read failed assume finish_command was called. */ + fclose(status_file); + return 1; +#else wait_or_whine(cmd, /*block=*/false); return cmd->finished; +#endif } int finish_command(struct child_process *cmd) diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index dfac76e35ac7..c742b08815dc 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -20,8 +20,8 @@ static __noreturn inline void die(const char *err, ...) va_start(params, err); report(" Fatal: ", err, params); - exit(128); va_end(params); + exit(128); } #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c index e1f264444342..880e36df0c11 100644 --- a/tools/mm/page_owner_sort.c +++ b/tools/mm/page_owner_sort.c @@ -377,6 +377,7 @@ static char *get_comm(char *buf) if (errno != 0) { if (debug_on) fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + free(comm_str); return NULL; } diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c index 04e9e6ba86ea..1433eff99feb 100644 --- a/tools/mm/slabinfo.c +++ b/tools/mm/slabinfo.c @@ -21,7 +21,7 @@ #include <regex.h> #include <errno.h> -#define MAX_SLABS 500 +#define MAX_SLABS 2000 #define MAX_ALIASES 500 #define MAX_NODES 1024 @@ -1228,6 +1228,8 @@ static void read_slab_dir(void) continue; switch (de->d_type) { case DT_LNK: + if (alias - aliasinfo == MAX_ALIASES) + fatal("Too many aliases\n"); alias->name = strdup(de->d_name); count = readlink(de->d_name, buffer, sizeof(buffer)-1); @@ -1242,6 +1244,8 @@ static void read_slab_dir(void) alias++; break; case DT_DIR: + if (slab - slabinfo == MAX_SLABS) + fatal("Too many slabs\n"); if (chdir(de->d_name)) fatal("Unable to access slab %s\n", slab->name); slab->name = strdup(de->d_name); @@ -1312,10 +1316,6 @@ static void read_slab_dir(void) slabs = slab - slabinfo; actual_slabs = slabs; aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); } static void output_slabs(void) diff --git a/tools/net/sunrpc/extract.sh b/tools/net/sunrpc/extract.sh new file mode 100755 index 000000000000..f944066f25bc --- /dev/null +++ b/tools/net/sunrpc/extract.sh @@ -0,0 +1,11 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Extract an RPC protocol specification from an RFC document. +# The version of this script comes from RFC 8166. +# +# Usage: +# $ extract.sh < rfcNNNN.txt > protocol.x +# + +grep '^ *///' | sed 's?^ */// ??' | sed 's?^ *///$??' diff --git a/tools/net/sunrpc/xdrgen/README b/tools/net/sunrpc/xdrgen/README index 92f7738ad50c..27218a78ab40 100644 --- a/tools/net/sunrpc/xdrgen/README +++ b/tools/net/sunrpc/xdrgen/README @@ -150,6 +150,23 @@ Pragma directives specify exceptions to the normal generation of encoding and decoding functions. Currently one directive is implemented: "public". +Pragma big_endian +------ ---------- + + pragma big_endian <enum> ; + +For variables that might contain only a small number values, it +is more efficient to avoid the byte-swap when encoding or decoding +on little-endian machines. Such is often the case with error status +codes. For example: + + pragma big_endian nfsstat3; + +In this case, when generating an XDR struct or union containing a +field of type "nfsstat3", xdrgen will make the type of that field +"__be32" instead of "enum nfsstat3". XDR unions then switch on the +non-byte-swapped value of that field. + Pragma exclude ------ ------- diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py index fd2457461274..b98574a36a4a 100644 --- a/tools/net/sunrpc/xdrgen/generators/__init__.py +++ b/tools/net/sunrpc/xdrgen/generators/__init__.py @@ -111,3 +111,7 @@ class SourceGenerator: def emit_encoder(self, node: _XdrAst) -> None: """Emit one encoder function for this XDR type""" raise NotImplementedError("Encoder generation not supported") + + def emit_maxsize(self, node: _XdrAst) -> None: + """Emit one maxsize macro for this XDR type""" + raise NotImplementedError("Maxsize macro generation not supported") diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index 855e43f4ae38..e62f715d3996 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -4,7 +4,7 @@ """Generate code to handle XDR enum types""" from generators import SourceGenerator, create_jinja2_environment -from xdr_ast import _XdrEnum, public_apis +from xdr_ast import _XdrEnum, public_apis, big_endian, get_header_name class XdrEnumGenerator(SourceGenerator): @@ -18,7 +18,7 @@ class XdrEnumGenerator(SourceGenerator): def emit_declaration(self, node: _XdrEnum) -> None: """Emit one declaration pair for an XDR enum type""" if node.name in public_apis: - template = self.environment.get_template("declaration/close.j2") + template = self.environment.get_template("declaration/enum.j2") print(template.render(name=node.name)) def emit_definition(self, node: _XdrEnum) -> None: @@ -30,15 +30,35 @@ class XdrEnumGenerator(SourceGenerator): for enumerator in node.enumerators: print(template.render(name=enumerator.name, value=enumerator.value)) - template = self.environment.get_template("definition/close.j2") + if node.name in big_endian: + template = self.environment.get_template("definition/close_be.j2") + else: + template = self.environment.get_template("definition/close.j2") print(template.render(name=node.name)) def emit_decoder(self, node: _XdrEnum) -> None: """Emit one decoder function for an XDR enum type""" - template = self.environment.get_template("decoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("decoder/enum_be.j2") + else: + template = self.environment.get_template("decoder/enum.j2") print(template.render(name=node.name)) def emit_encoder(self, node: _XdrEnum) -> None: """Emit one encoder function for an XDR enum type""" - template = self.environment.get_template("encoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("encoder/enum_be.j2") + else: + template = self.environment.get_template("encoder/enum.j2") print(template.render(name=node.name)) + + def emit_maxsize(self, node: _XdrEnum) -> None: + """Emit one maxsize macro for an XDR enum type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = self.environment.get_template("maxsize/enum.j2") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) diff --git a/tools/net/sunrpc/xdrgen/generators/pointer.py b/tools/net/sunrpc/xdrgen/generators/pointer.py index b0b27f1819c8..6dbda60ad2db 100644 --- a/tools/net/sunrpc/xdrgen/generators/pointer.py +++ b/tools/net/sunrpc/xdrgen/generators/pointer.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrPointer, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_pointer_declaration(environment: Environment, node: _XdrPointer) -> None: @@ -46,7 +46,7 @@ def emit_pointer_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_pointer_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_pointer_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_pointer_encoder(environment: Environment, node: _XdrPointer) -> None: print(template.render()) +def emit_pointer_maxsize(environment: Environment, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "pointer") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrPointerGenerator(SourceGenerator): """Generate source code for XDR pointer""" @@ -270,3 +282,7 @@ class XdrPointerGenerator(SourceGenerator): def emit_encoder(self, node: _XdrPointer) -> None: """Emit one encoder function for an XDR pointer type""" emit_pointer_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + emit_pointer_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/struct.py b/tools/net/sunrpc/xdrgen/generators/struct.py index b694cd470829..64911de46f62 100644 --- a/tools/net/sunrpc/xdrgen/generators/struct.py +++ b/tools/net/sunrpc/xdrgen/generators/struct.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrStruct, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_struct_declaration(environment: Environment, node: _XdrStruct) -> None: @@ -46,7 +46,7 @@ def emit_struct_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_struct_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_struct_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_struct_encoder(environment: Environment, node: _XdrStruct) -> None: print(template.render()) +def emit_struct_maxsize(environment: Environment, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "struct") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrStructGenerator(SourceGenerator): """Generate source code for XDR structs""" @@ -270,3 +282,7 @@ class XdrStructGenerator(SourceGenerator): def emit_encoder(self, node: _XdrStruct) -> None: """Emit one encoder function for an XDR struct type""" emit_struct_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + emit_struct_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/typedef.py b/tools/net/sunrpc/xdrgen/generators/typedef.py index 85a1b2303333..fab72e9d6915 100644 --- a/tools/net/sunrpc/xdrgen/generators/typedef.py +++ b/tools/net/sunrpc/xdrgen/generators/typedef.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrTypedef, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrTypedef, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrVoid, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> None: @@ -28,7 +28,7 @@ def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "declaration", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -74,7 +74,7 @@ def emit_type_definition(environment: Environment, node: _XdrDeclaration) -> Non classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "definition", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -119,7 +119,7 @@ def emit_typedef_decoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "decoder", node.template) print( template.render( @@ -180,7 +180,7 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "encoder", node.template) print( template.render( @@ -230,6 +230,18 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non raise NotImplementedError("typedef: type not recognized") +def emit_typedef_maxsize(environment: Environment, node: _XdrDeclaration) -> None: + """Emit a maxsize macro for an XDR typedef""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", node.template) + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrTypedefGenerator(SourceGenerator): """Generate source code for XDR typedefs""" @@ -253,3 +265,7 @@ class XdrTypedefGenerator(SourceGenerator): def emit_encoder(self, node: _XdrTypedef) -> None: """Emit one encoder function for an XDR typedef""" emit_typedef_encoder(self.environment, node.declaration) + + def emit_maxsize(self, node: _XdrTypedef) -> None: + """Emit one maxsize macro for an XDR typedef""" + emit_typedef_maxsize(self.environment, node.declaration) diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py index 7974967bbb9f..2cca00e279cd 100644 --- a/tools/net/sunrpc/xdrgen/generators/union.py +++ b/tools/net/sunrpc/xdrgen/generators/union.py @@ -8,8 +8,8 @@ from jinja2 import Environment from generators import SourceGenerator from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid -from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis +from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name +from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian def emit_union_declaration(environment: Environment, node: _XdrUnion) -> None: @@ -77,13 +77,18 @@ def emit_union_switch_spec_decoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_decoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_decoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit decoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "decoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "decoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "decoder", "case_spec") for case in node.values: print(template.render(case=case)) @@ -136,7 +141,11 @@ def emit_union_decoder(environment: Environment, node: _XdrUnion) -> None: emit_union_switch_spec_decoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_decoder(environment, case) + emit_union_case_spec_decoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_decoder(environment, node) @@ -153,17 +162,21 @@ def emit_union_switch_spec_encoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_encoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_encoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit encoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "encoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "encoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "encoder", "case_spec") for case in node.values: print(template.render(case=case)) - assert isinstance(node.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", node.arm.template) print( template.render( @@ -192,7 +205,6 @@ def emit_union_default_spec_encoder(environment: Environment, node: _XdrUnion) - print(template.render()) return - assert isinstance(default_case.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", default_case.arm.template) print( template.render( @@ -210,7 +222,11 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: emit_union_switch_spec_encoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_encoder(environment, case) + emit_union_case_spec_encoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_encoder(environment, node) @@ -218,6 +234,18 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: print(template.render()) +def emit_union_maxsize(environment: Environment, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "union") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrUnionGenerator(SourceGenerator): """Generate source code for XDR unions""" @@ -241,3 +269,7 @@ class XdrUnionGenerator(SourceGenerator): def emit_encoder(self, node: _XdrUnion) -> None: """Emit one encoder function for an XDR union""" emit_union_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union""" + emit_union_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/grammars/xdr.lark b/tools/net/sunrpc/xdrgen/grammars/xdr.lark index f3c4552e548d..7c2c1b8c86d1 100644 --- a/tools/net/sunrpc/xdrgen/grammars/xdr.lark +++ b/tools/net/sunrpc/xdrgen/grammars/xdr.lark @@ -3,7 +3,7 @@ declaration : "opaque" identifier "[" value "]" -> fixed_length_opaque | "opaque" identifier "<" [ value ] ">" -> variable_length_opaque - | "string" identifier "<" [ value ] ">" -> variable_length_string + | "string" identifier "<" [ value ] ">" -> string | type_specifier identifier "[" value "]" -> fixed_length_array | type_specifier identifier "<" [ value ] ">" -> variable_length_array | type_specifier "*" identifier -> optional_data @@ -87,12 +87,14 @@ procedure_def : type_specifier identifier "(" type_specifier ")" "=" c pragma_def : "pragma" directive identifier [ identifier ] ";" -directive : exclude_directive +directive : big_endian_directive + | exclude_directive | header_directive | pages_directive | public_directive | skip_directive +big_endian_directive : "big_endian" exclude_directive : "exclude" header_directive : "header" pages_directive : "pages" diff --git a/tools/net/sunrpc/xdrgen/subcmds/definitions.py b/tools/net/sunrpc/xdrgen/subcmds/definitions.py index 5cd13d53221f..c956e27f37c0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/definitions.py +++ b/tools/net/sunrpc/xdrgen/subcmds/definitions.py @@ -28,9 +28,7 @@ from xdr_parse import xdr_parser, set_xdr_annotate logger.setLevel(logging.INFO) -def emit_header_definitions( - root: Specification, language: str, peer: str -) -> None: +def emit_header_definitions(root: Specification, language: str, peer: str) -> None: """Emit header definitions""" for definition in root.definitions: if isinstance(definition.value, _XdrConstant): @@ -52,6 +50,25 @@ def emit_header_definitions( gen.emit_definition(definition.value) +def emit_header_maxsize(root: Specification, language: str, peer: str) -> None: + """Emit header maxsize macros""" + print("") + for definition in root.definitions: + if isinstance(definition.value, _XdrEnum): + gen = XdrEnumGenerator(language, peer) + elif isinstance(definition.value, _XdrPointer): + gen = XdrPointerGenerator(language, peer) + elif isinstance(definition.value, _XdrTypedef): + gen = XdrTypedefGenerator(language, peer) + elif isinstance(definition.value, _XdrStruct): + gen = XdrStructGenerator(language, peer) + elif isinstance(definition.value, _XdrUnion): + gen = XdrUnionGenerator(language, peer) + else: + continue + gen.emit_maxsize(definition.value) + + def handle_parse_error(e: UnexpectedInput) -> bool: """Simple parse error reporting, no recovery attempted""" print(e) @@ -71,6 +88,7 @@ def subcmd(args: Namespace) -> int: gen.emit_definition(args.filename, ast) emit_header_definitions(ast, args.language, args.peer) + emit_header_maxsize(ast, args.language, args.peer) gen = XdrHeaderBottomGenerator(args.language, args.peer) gen.emit_definition(args.filename, ast) diff --git a/tools/net/sunrpc/xdrgen/subcmds/source.py b/tools/net/sunrpc/xdrgen/subcmds/source.py index 00c04ad15b89..2024954748f0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/source.py +++ b/tools/net/sunrpc/xdrgen/subcmds/source.py @@ -83,8 +83,7 @@ def generate_client_source(filename: str, root: Specification, language: str) -> gen = XdrSourceTopGenerator(language, "client") gen.emit_source(filename, root) - # cel: todo: client needs XDR size macros - + print("") for definition in root.definitions: emit_source_encoder(definition.value, language, "client") for definition in root.definitions: diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 deleted file mode 100644 index ab1e576c9531..000000000000 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 +++ /dev/null @@ -1,4 +0,0 @@ -{# SPDX-License-Identifier: GPL-2.0 #} - -bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr); -bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 new file mode 100644 index 000000000000..d1405c7c5354 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 @@ -0,0 +1,4 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr); +bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 index 341d829afeda..6482984f1cb7 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr) +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) { u32 val; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 new file mode 100644 index 000000000000..44c391c10b42 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) +{ + return xdr_stream_decode_be32(xdr, ptr) == 0; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 index 9e62344a976a..a07586cbee17 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 @@ -1,2 +1,3 @@ {# SPDX-License-Identifier: GPL-2.0 #} }; +typedef enum {{ name }} {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 new file mode 100644 index 000000000000..2c18948bddf7 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +}; +typedef __be32 {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 index bd0a770e50f2..67245b9a914d 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value) +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) { return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 new file mode 100644 index 000000000000..fbbcc45948d6 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) +{ + return xdr_stream_encode_be32(xdr, value) == XDR_UNIT; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index d304eccb5c40..aa9940e322db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,10 +13,6 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat != nfs_ok) { - trace_nfs_xdr_status(xdr, (int)result->stat); - return {{ program }}_stat_to_errno(result->stat); - } {% endif %} return 0; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 index e3a802cbc4d7..c5518c519854 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 @@ -3,6 +3,11 @@ // XDR specification file: {{ filename }} // XDR specification modification time: {{ mtime }} -#include <linux/sunrpc/xprt.h> +#include <linux/types.h> -#include "{{ program }}xdr_gen.h" +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/xdrgen/_defs.h> +#include <linux/sunrpc/xdrgen/_builtins.h> +#include <linux/sunrpc/xdrgen/nlm4.h> + +#include <linux/sunrpc/clnt.h> diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 index 3fe3ddd9f359..3fe3ddd9f359 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 index 56c5a17d6a70..56c5a17d6a70 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 index c03c2df8e625..c03c2df8e625 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 index 3d490ff180d0..3d490ff180d0 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 index 83b6e5a14e7f..83b6e5a14e7f 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index dbd3fcf9c957..5233e73c7046 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -12,13 +12,50 @@ from lark.tree import Meta this_module = sys.modules[__name__] +big_endian = [] excluded_apis = [] header_name = "none" public_apis = [] -enums = set() structs = set() pass_by_reference = set() +constants = {} + + +def xdr_quadlen(val: str) -> int: + """Return integer XDR width of an XDR type""" + if val in constants: + octets = constants[val] + else: + octets = int(val) + return int((octets + 3) / 4) + + +symbolic_widths = { + "void": ["XDR_void"], + "bool": ["XDR_bool"], + "int": ["XDR_int"], + "unsigned_int": ["XDR_unsigned_int"], + "long": ["XDR_long"], + "unsigned_long": ["XDR_unsigned_long"], + "hyper": ["XDR_hyper"], + "unsigned_hyper": ["XDR_unsigned_hyper"], +} + +# Numeric XDR widths are tracked in a dictionary that is keyed +# by type_name because sometimes a caller has nothing more than +# the type_name to use to figure out the numeric width. +max_widths = { + "void": 0, + "bool": 1, + "int": 1, + "unsigned_int": 1, + "long": 1, + "unsigned_long": 1, + "hyper": 2, + "unsigned_hyper": 2, +} + @dataclass class _XdrAst(ast_utils.Ast): @@ -51,18 +88,31 @@ class _XdrTypeSpecifier(_XdrAst): """Corresponds to 'type_specifier' in the XDR language grammar""" type_name: str - c_classifier: str + c_classifier: str = "" @dataclass class _XdrDefinedType(_XdrTypeSpecifier): """Corresponds to a type defined by the input specification""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [get_header_name().upper() + "_" + self.type_name + "_sz"] + + def __post_init__(self): + if self.type_name in structs: + self.c_classifier = "struct " + symbolic_widths[self.type_name] = self.symbolic_width() + @dataclass class _XdrBuiltInType(_XdrTypeSpecifier): """Corresponds to a built-in XDR type""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.type_name] + @dataclass class _XdrDeclaration(_XdrAst): @@ -77,6 +127,18 @@ class _XdrFixedLengthOpaque(_XdrDeclaration): size: str template: str = "fixed_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_QUADLEN(" + self.size + ")"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthOpaque(_XdrDeclaration): @@ -86,14 +148,44 @@ class _XdrVariableLengthOpaque(_XdrDeclaration): maxsize: str template: str = "variable_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass -class _XdrVariableLengthString(_XdrDeclaration): +class _XdrString(_XdrDeclaration): """A (NUL-terminated) variable-length string declaration""" name: str maxsize: str - template: str = "variable_length_string" + template: str = "string" + + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass @@ -105,6 +197,19 @@ class _XdrFixedLengthArray(_XdrDeclaration): size: str template: str = "fixed_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) * max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + return ["(" + self.size + " * (" + item_width + "))"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthArray(_XdrDeclaration): @@ -115,6 +220,22 @@ class _XdrVariableLengthArray(_XdrDeclaration): maxsize: str template: str = "variable_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + (xdr_quadlen(self.maxsize) * max_widths[self.spec.type_name]) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + widths.append("(" + self.maxsize + " * (" + item_width + "))") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrOptionalData(_XdrDeclaration): @@ -124,6 +245,20 @@ class _XdrOptionalData(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "optional_data" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_bool"] + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrBasic(_XdrDeclaration): @@ -133,13 +268,34 @@ class _XdrBasic(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "basic" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.spec.type_name] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVoid(_XdrDeclaration): """A void declaration""" + name: str = "void" template: str = "void" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 0 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [] + @dataclass class _XdrConstant(_XdrAst): @@ -148,6 +304,10 @@ class _XdrConstant(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnumerator(_XdrAst): @@ -156,6 +316,10 @@ class _XdrEnumerator(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnum(_XdrAst): @@ -166,6 +330,18 @@ class _XdrEnum(_XdrAst): maximum: int enumerators: List[_XdrEnumerator] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_int"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrStruct(_XdrAst): @@ -174,6 +350,26 @@ class _XdrStruct(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 0 + for field in self.fields: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + for field in self.fields: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrPointer(_XdrAst): @@ -182,6 +378,27 @@ class _XdrPointer(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 1 + for field in self.fields[0:-1]: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + widths += ["XDR_bool"] + for field in self.fields[0:-1]: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrTypedef(_XdrAst): @@ -189,6 +406,23 @@ class _XdrTypedef(_XdrAst): declaration: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return self.declaration.max_width() + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return self.declaration.symbolic_width() + + def __post_init__(self): + if isinstance(self.declaration, _XdrBasic): + new_type = self.declaration + if isinstance(new_type.spec, _XdrDefinedType): + if new_type.spec.type_name in pass_by_reference: + pass_by_reference.add(new_type.name) + max_widths[new_type.name] = self.max_width() + symbolic_widths[new_type.name] = self.symbolic_width() + @dataclass class _XdrCaseSpec(_XdrAst): @@ -216,6 +450,36 @@ class _XdrUnion(_XdrAst): cases: List[_XdrCaseSpec] default: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + return 1 + max_width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + width = case.arm.symbolic_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + width = self.default.arm.symbolic_width() + return symbolic_widths[self.discriminant.name] + width + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _RpcProcedure(_XdrAst): @@ -290,24 +554,13 @@ class ParseToAst(Transformer): return _XdrConstantValue(value) def type_specifier(self, children): - """Instantiate one type_specifier object""" - c_classifier = "" + """Instantiate one _XdrTypeSpecifier object""" if isinstance(children[0], _XdrIdentifier): name = children[0].symbol - if name in enums: - c_classifier = "enum " - if name in structs: - c_classifier = "struct " - return _XdrDefinedType( - type_name=name, - c_classifier=c_classifier, - ) - - token = children[0].data - return _XdrBuiltInType( - type_name=token.value, - c_classifier=c_classifier, - ) + return _XdrDefinedType(type_name=name) + + name = children[0].data.value + return _XdrBuiltInType(type_name=name) def constant_def(self, children): """Instantiate one _XdrConstant object""" @@ -320,7 +573,6 @@ class ParseToAst(Transformer): def enum(self, children): """Instantiate one _XdrEnum object""" enum_name = children[0].symbol - enums.add(enum_name) i = 0 enumerators = [] @@ -350,15 +602,15 @@ class ParseToAst(Transformer): return _XdrVariableLengthOpaque(name, maxsize) - def variable_length_string(self, children): - """Instantiate one _XdrVariableLengthString declaration object""" + def string(self, children): + """Instantiate one _XdrString declaration object""" name = children[0].symbol if children[1] is not None: maxsize = children[1].value else: maxsize = "0" - return _XdrVariableLengthString(name, maxsize) + return _XdrString(name, maxsize) def fixed_length_array(self, children): """Instantiate one _XdrFixedLengthArray declaration object""" @@ -383,8 +635,6 @@ class ParseToAst(Transformer): """Instantiate one _XdrOptionalData declaration object""" spec = children[0] name = children[1].symbol - structs.add(name) - pass_by_reference.add(name) return _XdrOptionalData(name, spec) @@ -403,8 +653,6 @@ class ParseToAst(Transformer): def struct(self, children): """Instantiate one _XdrStruct object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) fields = children[1].children last_field = fields[-1] @@ -419,11 +667,6 @@ class ParseToAst(Transformer): def typedef(self, children): """Instantiate one _XdrTypedef object""" new_type = children[0] - if isinstance(new_type, _XdrBasic) and isinstance( - new_type.spec, _XdrDefinedType - ): - if new_type.spec.type_name in pass_by_reference: - pass_by_reference.add(new_type.name) return _XdrTypedef(new_type) @@ -445,8 +688,6 @@ class ParseToAst(Transformer): def union(self, children): """Instantiate one _XdrUnion object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) body = children[1] discriminant = body.children[0].children[0] @@ -484,6 +725,8 @@ class ParseToAst(Transformer): """Instantiate one _Pragma object""" directive = children[0].children[0].data match directive: + case "big_endian_directive": + big_endian.append(children[1].symbol) case "exclude_directive": excluded_apis.append(children[1].symbol) case "header_directive": diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen index 95f303b2861b..43762be39252 100755 --- a/tools/net/sunrpc/xdrgen/xdrgen +++ b/tools/net/sunrpc/xdrgen/xdrgen @@ -128,5 +128,7 @@ There is NO WARRANTY, to the extent permitted by law.""", try: if __name__ == "__main__": sys.exit(main()) -except (SystemExit, KeyboardInterrupt, BrokenPipeError): +except SystemExit: + sys.exit(0) +except (KeyboardInterrupt, BrokenPipeError): sys.exit(1) diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index d1cdf2a8f826..211df5a93ad9 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -1,5 +1,17 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.arch + +INSTALL ?= install +prefix ?= /usr +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif +libdir ?= $(prefix)/$(libdir_relative) +includedir ?= $(prefix)/include + SUBDIRS = lib generated samples all: $(SUBDIRS) libynl.a @@ -21,5 +33,20 @@ clean distclean: fi \ done rm -f libynl.a + rm -rf pyynl/__pycache__ + rm -rf pyynl/lib/__pycache__ + rm -rf pyynl.egg-info + rm -rf build + +install: libynl.a lib/*.h + @echo -e "\tINSTALL libynl.a" + @$(INSTALL) -d $(DESTDIR)$(libdir) + @$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a + @echo -e "\tINSTALL libynl headers" + @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl + @$(INSTALL) -m 0644 lib/*.h $(DESTDIR)$(includedir)/ynl/ + @echo -e "\tINSTALL pyynl" + @pip install --prefix=$(DESTDIR)$(prefix) . + @make -C generated install -.PHONY: all clean distclean $(SUBDIRS) +.PHONY: all clean distclean install $(SUBDIRS) diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore index ade488626d26..859a6fb446e1 100644 --- a/tools/net/ynl/generated/.gitignore +++ b/tools/net/ynl/generated/.gitignore @@ -1,2 +1,3 @@ *-user.c *-user.h +*.rst diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 7db5240de58a..21f9e299dc75 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -7,32 +7,44 @@ ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif +INSTALL ?= install +prefix ?= /usr +datarootdir ?= $(prefix)/share +docdir ?= $(datarootdir)/doc +includedir ?= $(prefix)/include + include ../Makefile.deps YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ --exclude-op stats-get -TOOL:=../ynl-gen-c.py +TOOL:=../pyynl/ynl_gen_c.py +TOOL_RST:=../pyynl/ynl_gen_rst.py +SPECS_DIR:=../../../../Documentation/netlink/specs GENS_PATHS=$(shell grep -nrI --files-without-match \ 'protocol: netlink' \ - ../../../../Documentation/netlink/specs/) -GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS}) + $(SPECS_DIR)) +GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) -all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) +SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml) +SPECS=$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS}) +RSTS=$(patsubst %,%.rst,${SPECS}) + +all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) $(RSTS) protos.a: $(OBJS) @echo -e "\tAR $@" @ar rcs $@ $(OBJS) -%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) +%-user.h: $(SPECS_DIR)/%.yaml $(TOOL) @echo -e "\tGEN $@" @$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*) -%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) +%-user.c: $(SPECS_DIR)/%.yaml $(TOOL) @echo -e "\tGEN $@" @$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*) @@ -40,14 +52,37 @@ protos.a: $(OBJS) @echo -e "\tCC $@" @$(COMPILE.c) $(CFLAGS_$*) -o $@ $< +%.rst: $(SPECS_DIR)/%.yaml $(TOOL_RST) + @echo -e "\tGEN_RST $@" + @$(TOOL_RST) -o $@ -i $< + clean: rm -f *.o distclean: clean - rm -f *.c *.h *.a + rm -f *.c *.h *.a *.rst regen: @../ynl-regen.sh -.PHONY: all clean distclean regen +install-headers: $(HDRS) + @echo -e "\tINSTALL generated headers" + @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl + @$(INSTALL) -m 0644 *.h $(DESTDIR)$(includedir)/ynl/ + +install-rsts: $(RSTS) + @echo -e "\tINSTALL generated docs" + @$(INSTALL) -d $(DESTDIR)$(docdir)/ynl + @$(INSTALL) -m 0644 $(RSTS) $(DESTDIR)$(docdir)/ynl/ + +install-specs: + @echo -e "\tINSTALL specs" + @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl + @$(INSTALL) -m 0644 ../../../../Documentation/netlink/*.yaml $(DESTDIR)$(datarootdir)/ynl/ + @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl/specs + @$(INSTALL) -m 0644 $(SPECS_DIR)/*.yaml $(DESTDIR)$(datarootdir)/ynl/specs/ + +install: install-headers install-rsts install-specs + +.PHONY: all clean distclean regen install install-headers install-rsts install-specs .DEFAULT_GOAL: all diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore index 296c4035dbf2..a4383358ec72 100644 --- a/tools/net/ynl/lib/.gitignore +++ b/tools/net/ynl/lib/.gitignore @@ -1,2 +1 @@ -__pycache__/ *.d diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 94c49cca3dca..4b2b98704ff9 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -19,7 +19,6 @@ ynl.a: $(OBJS) clean: rm -f *.o *.d *~ - rm -rf __pycache__ distclean: clean rm -f *.a diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index e16cef160bc2..ce32cb35007d 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + ynl_attr_data_len(attr); + aend_off = (char *)ynl_attr_data_end(attr) - (char *)start; if (aend_off <= off) continue; diff --git a/tools/net/ynl/pyproject.toml b/tools/net/ynl/pyproject.toml new file mode 100644 index 000000000000..a81d8779b0e0 --- /dev/null +++ b/tools/net/ynl/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyynl" +authors = [ + {name = "Donald Hunter", email = "donald.hunter@gmail.com"}, + {name = "Jakub Kicinski", email = "kuba@kernel.org"}, +] +description = "yaml netlink (ynl)" +version = "0.0.1" +requires-python = ">=3.9" +dependencies = [ + "pyyaml==6.*", + "jsonschema==4.*" +] + +[tool.setuptools.packages.find] +include = ["pyynl", "pyynl.lib"] + +[project.scripts] +ynl = "pyynl.cli:main" +ynl-ethtool = "pyynl.ethtool:main" diff --git a/tools/net/ynl/pyynl/.gitignore b/tools/net/ynl/pyynl/.gitignore new file mode 100644 index 000000000000..b801cd2d016e --- /dev/null +++ b/tools/net/ynl/pyynl/.gitignore @@ -0,0 +1,2 @@ +__pycache__/ +lib/__pycache__/ diff --git a/tools/net/ynl/pyynl/__init__.py b/tools/net/ynl/pyynl/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/net/ynl/pyynl/__init__.py diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/pyynl/cli.py index 41d9fa5c818d..794e3c7dcc65 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -3,6 +3,7 @@ import argparse import json +import os import pathlib import pprint import sys @@ -10,6 +11,24 @@ import sys sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily, Netlink, NlError +sys_schema_dir='/usr/share/ynl' +relative_schema_dir='../../../../Documentation/netlink' + +def schema_dir(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}") + if not os.path.isdir(schema_dir): + schema_dir = sys_schema_dir + if not os.path.isdir(schema_dir): + raise Exception(f"Schema directory {schema_dir} does not exist") + return schema_dir + +def spec_dir(): + spec_dir = schema_dir() + '/specs' + if not os.path.isdir(spec_dir): + raise Exception(f"Spec directory {spec_dir} does not exist") + return spec_dir + class YnlEncoder(json.JSONEncoder): def default(self, obj): @@ -32,7 +51,14 @@ def main(): parser = argparse.ArgumentParser(description=description, epilog=epilog) - parser.add_argument('--spec', dest='spec', type=str, required=True) + spec_group = parser.add_mutually_exclusive_group(required=True) + spec_group.add_argument('--family', dest='family', type=str, + help='name of the netlink FAMILY') + spec_group.add_argument('--list-families', action='store_true', + help='list all netlink families supported by YNL (has spec)') + spec_group.add_argument('--spec', dest='spec', type=str, + help='choose the family by SPEC file path') + parser.add_argument('--schema', dest='schema', type=str) parser.add_argument('--no-schema', action='store_true') parser.add_argument('--json', dest='json_text', type=str) @@ -70,6 +96,12 @@ def main(): else: pprint.PrettyPrinter().pprint(msg) + if args.list_families: + for filename in sorted(os.listdir(spec_dir())): + if filename.endswith('.yaml'): + print(filename.removesuffix('.yaml')) + return + if args.no_schema: args.schema = '' @@ -77,7 +109,16 @@ def main(): if args.json_text: attrs = json.loads(args.json_text) - ynl = YnlFamily(args.spec, args.schema, args.process_unknown, + if args.family: + spec = f"{spec_dir()}/{args.family}.yaml" + if args.schema is None and spec.startswith(sys_schema_dir): + args.schema = '' # disable schema validation when installed + else: + spec = args.spec + if not os.path.isfile(spec): + raise Exception(f"Spec file {spec} does not exist") + + ynl = YnlFamily(spec, args.schema, args.process_unknown, recv_size=args.dbg_small_recv) if args.dbg_small_recv: ynl.set_recv_dbg(True) diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index ebb0a11f67bf..af7fddd7b085 100755 --- a/tools/net/ynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -11,6 +11,7 @@ import os sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily +from cli import schema_dir, spec_dir def args_to_req(ynl, op_name, args, req): """ @@ -156,10 +157,8 @@ def main(): args = parser.parse_args() script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0])) - spec = os.path.join(script_abs_dir, - '../../../Documentation/netlink/specs/ethtool.yaml') - schema = os.path.join(script_abs_dir, - '../../../Documentation/netlink/genetlink-legacy.yaml') + spec = os.path.join(spec_dir(), 'ethtool.yaml') + schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml') ynl = YnlFamily(spec, schema) diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py index 9137b83e580a..9137b83e580a 100644 --- a/tools/net/ynl/lib/__init__.py +++ b/tools/net/ynl/pyynl/lib/__init__.py diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py index a745739655ad..314ec8007496 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/pyynl/lib/nlspec.py @@ -219,7 +219,10 @@ class SpecAttrSet(SpecElement): else: real_set = family.attr_sets[self.subset_of] for elem in self.yaml['attributes']: - attr = real_set[elem['name']] + real_attr = real_set[elem['name']] + combined_elem = real_attr.yaml | elem + attr = self.new_attr(combined_elem, real_attr.value) + self.attrs[attr.name] = attr self.attrs_by_val[attr.value] = attr diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 01ec01a90e76..08f8bf89cfc2 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -556,10 +556,10 @@ class YnlFamily(SpecFamily): if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' - sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) + sub_space = attr['nested-attributes'] + sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], - subname, subvalue, sub_attrs) + attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs) elif attr["type"] == 'flag': if not value: # If value is absent or false then skip attribute creation. @@ -733,41 +733,45 @@ class YnlFamily(SpecFamily): self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr)) continue - if attr_spec["type"] == 'nest': - subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) - decoded = subdict - elif attr_spec["type"] == 'string': - decoded = attr.as_strz() - elif attr_spec["type"] == 'binary': - decoded = self._decode_binary(attr, attr_spec) - elif attr_spec["type"] == 'flag': - decoded = True - elif attr_spec.is_auto_scalar: - decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) - elif attr_spec["type"] in NlAttr.type_formats: - decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) - if 'enum' in attr_spec: - decoded = self._decode_enum(decoded, attr_spec) - elif attr_spec.display_hint: - decoded = self._formatted_string(decoded, attr_spec.display_hint) - elif attr_spec["type"] == 'indexed-array': - decoded = self._decode_array_attr(attr, attr_spec) - elif attr_spec["type"] == 'bitfield32': - value, selector = struct.unpack("II", attr.raw) - if 'enum' in attr_spec: - value = self._decode_enum(value, attr_spec) - selector = self._decode_enum(selector, attr_spec) - decoded = {"value": value, "selector": selector} - elif attr_spec["type"] == 'sub-message': - decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) - elif attr_spec["type"] == 'nest-type-value': - decoded = self._decode_nest_type_value(attr, attr_spec) - else: - if not self.process_unknown: - raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') - decoded = self._decode_unknown(attr) - - self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded) + try: + if attr_spec["type"] == 'nest': + subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) + decoded = subdict + elif attr_spec["type"] == 'string': + decoded = attr.as_strz() + elif attr_spec["type"] == 'binary': + decoded = self._decode_binary(attr, attr_spec) + elif attr_spec["type"] == 'flag': + decoded = True + elif attr_spec.is_auto_scalar: + decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) + elif attr_spec["type"] in NlAttr.type_formats: + decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) + if 'enum' in attr_spec: + decoded = self._decode_enum(decoded, attr_spec) + elif attr_spec.display_hint: + decoded = self._formatted_string(decoded, attr_spec.display_hint) + elif attr_spec["type"] == 'indexed-array': + decoded = self._decode_array_attr(attr, attr_spec) + elif attr_spec["type"] == 'bitfield32': + value, selector = struct.unpack("II", attr.raw) + if 'enum' in attr_spec: + value = self._decode_enum(value, attr_spec) + selector = self._decode_enum(selector, attr_spec) + decoded = {"value": value, "selector": selector} + elif attr_spec["type"] == 'sub-message': + decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) + elif attr_spec["type"] == 'nest-type-value': + decoded = self._decode_nest_type_value(attr, attr_spec) + else: + if not self.process_unknown: + raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') + decoded = self._decode_unknown(attr) + + self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded) + except: + print(f"Error decoding '{attr_spec.name}' from '{space}'") + raise return rsp diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index d8201c4b1520..c2eabc90dce8 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -79,6 +79,20 @@ class Type(SpecAttr): self.enum_name = None delattr(self, "enum_name") + def _get_real_attr(self): + # if the attr is for a subset return the "real" attr (just one down, does not recurse) + return self.family.attr_sets[self.attr_set.subset_of][self.name] + + def set_request(self): + self.request = True + if self.attr_set.subset_of: + self._get_real_attr().set_request() + + def set_reply(self): + self.reply = True + if self.attr_set.subset_of: + self._get_real_attr().set_reply() + def get_limit(self, limit, default=None): value = self.checks.get(limit, default) if value is None: @@ -106,6 +120,10 @@ class Type(SpecAttr): enum_name = f"{self.attr_set.name_prefix}{self.name}" self.enum_name = c_upper(enum_name) + if self.attr_set.subset_of: + if self.checks != self._get_real_attr().checks: + raise Exception("Overriding checks not supported by codegen, yet") + def is_multi_val(self): return None @@ -801,6 +819,8 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.header = yaml.get('header', None) + self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -1117,17 +1137,17 @@ class Family(SpecFamily): for _, struct in self.pure_nested_structs.items(): if struct.request: for _, arg in struct.member_list(): - arg.request = True + arg.set_request() if struct.reply: for _, arg in struct.member_list(): - arg.reply = True + arg.set_reply() for root_set, rs_members in self.root_sets.items(): for attr, spec in self.attr_sets[root_set].items(): if attr in rs_members['request']: - spec.request = True + spec.set_request() if attr in rs_members['reply']: - spec.reply = True + spec.set_reply() def _load_global_policy(self): global_set = set() @@ -1763,7 +1783,14 @@ def parse_rsp_nested(ri, struct): f'{struct.ptr_name}dst = yarg->data;'] init_lines = [] - _multi_parse(ri, struct, init_lines, local_vars) + if struct.member_list(): + _multi_parse(ri, struct, init_lines, local_vars) + else: + # Empty nest + ri.cw.block_start() + ri.cw.p('return 0;') + ri.cw.block_end() + ri.cw.nl() def parse_rsp_msg(ri, deref=False): @@ -2384,6 +2411,17 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return + if 'sock-priv' in family.kernel_family: + # Generate "trampolines" to make CFI happy + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init", + [f"{family.c_name}_nl_sock_priv_init(priv);"], + ["void *priv"]) + cw.nl() + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy", + [f"{family.c_name}_nl_sock_priv_destroy(priv);"], + ["void *priv"]) + cw.nl() + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') @@ -2401,9 +2439,8 @@ def print_kernel_family_struct_src(family, cw): cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') if 'sock-priv' in family.kernel_family: cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),') - # Force cast here, actual helpers take pointer to the real type. - cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,') - cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,') + cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,') + cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,') cw.block_end(';') @@ -2417,6 +2454,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): cw.block_start(line=start_line) +def render_uapi_unified(family, cw, max_by_define, separate_ntf): + max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) + cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + uapi_enum_start(family, cw, family['operations'], 'enum-name') + val = 0 + for op in family.msgs.values(): + if separate_ntf and ('notify' in op or 'event' in op): + continue + + suffix = ',' + if op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + +def render_uapi_directional(family, cw, max_by_define): + max_name = f"{family.op_prefix}USER_MAX" + cnt_name = f"__{family.op_prefix}USER_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if 'do' in op and 'event' not in op: + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + max_name = f"{family.op_prefix}KERNEL_MAX" + cnt_name = f"__{family.op_prefix}KERNEL_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op: + enum_name = op.enum_name + if 'event' not in op and 'notify' not in op: + enum_name = f'{enum_name}_REPLY' + + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" hdr_prot = hdr_prot.replace('/', '_') @@ -2440,6 +2558,9 @@ def render_uapi(family, cw): if const['type'] == 'enum' or const['type'] == 'flags': enum = family.consts[const['name']] + if enum.header: + continue + if enum.has_doc(): if enum.has_entry_doc(): cw.p('/**') @@ -2472,9 +2593,12 @@ def render_uapi(family, cw): max_val = f' = {enum.get_mask()},' cw.p(max_name + max_val) else: + cnt_name = enum.enum_cnt_name max_name = c_upper(name_pfx + 'max') - cw.p('__' + max_name + ',') - cw.p(max_name + ' = (__' + max_name + ' - 1)') + if not cnt_name: + cnt_name = '__' + name_pfx + 'max' + cw.p(c_upper(cnt_name) + ',') + cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)') cw.block_end(line=';') cw.nl() elif const['type'] == 'const': @@ -2503,7 +2627,8 @@ def render_uapi(family, cw): val = attr.value val += 1 cw.p(attr.enum_name + suffix) - cw.nl() + if attr_set.items(): + cw.nl() cw.p(attr_set.cnt_name + ('' if max_by_define else ',')) if not max_by_define: cw.p(f"{attr_set.max_name} = {max_value}") @@ -2515,30 +2640,12 @@ def render_uapi(family, cw): # Commands separate_ntf = 'async-prefix' in family['operations'] - max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) - cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) - max_value = f"({cnt_name} - 1)" - - uapi_enum_start(family, cw, family['operations'], 'enum-name') - val = 0 - for op in family.msgs.values(): - if separate_ntf and ('notify' in op or 'event' in op): - continue - - suffix = ',' - if op.value != val: - suffix = f" = {op.value}," - val = op.value - cw.p(op.enum_name + suffix) - val += 1 - cw.nl() - cw.p(cnt_name + ('' if max_by_define else ',')) - if not max_by_define: - cw.p(f"{max_name} = {max_value}") - cw.block_end(line=';') - if max_by_define: - cw.p(f"#define {max_name} {max_value}") - cw.nl() + if family.msg_id_model == 'unified': + render_uapi_unified(family, cw, max_by_define, separate_ntf) + elif family.msg_id_model == 'directional': + render_uapi_directional(family, cw, max_by_define) + else: + raise Exception(f'Unsupported message enum-model {family.msg_id_model}') if separate_ntf: uapi_enum_start(family, cw, family['operations'], enum_name='async-enum') @@ -2635,7 +2742,8 @@ def find_kernel_root(full_path): def main(): parser = argparse.ArgumentParser(description='Netlink simple parsing generator') - parser.add_argument('--mode', dest='mode', type=str, required=True) + parser.add_argument('--mode', dest='mode', type=str, required=True, + choices=('user', 'kernel', 'uapi')) parser.add_argument('--spec', dest='spec', type=str, required=True) parser.add_argument('--header', dest='header', action='store_true', default=None) parser.add_argument('--source', dest='header', action='store_false') @@ -2662,13 +2770,6 @@ def main(): os.sys.exit(1) return - supported_models = ['unified'] - if args.mode in ['user', 'kernel']: - supported_models += ['directional'] - if parsed.msg_id_model not in supported_models: - print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') - os.sys.exit(1) - cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) _, spec_kernel = find_kernel_root(args.spec) @@ -2696,7 +2797,10 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() - hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.out_file: + hdr_file = os.path.basename(args.out_file[:-2]) + ".h" + else: + hdr_file = "generated_header_file.h" if args.mode == 'kernel': cw.p('#include <net/netlink.h>') @@ -2718,12 +2822,17 @@ def main(): else: cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') - headers = [parsed.uapi_header] + headers = [] for definition in parsed['definitions']: if 'header' in definition: headers.append(definition['header']) + if args.mode == 'user': + headers.append(parsed.uapi_header) + seen_header = [] for one in headers: - cw.p(f"#include <{one}>") + if one not in seen_header: + cw.p(f"#include <{one}>") + seen_header.append(one) cw.nl() if args.mode == "user": diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py index 6c56d0d726b4..6c56d0d726b4 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/pyynl/ynl_gen_rst.py diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index a37304dcc88e..81b4ecd89100 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause -TOOL=$(dirname $(realpath $0))/ynl-gen-c.py +TOOL=$(dirname $(realpath $0))/pyynl/ynl_gen_c.py force= search= diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c index 9bba1e9318e0..87230ed570fd 100644 --- a/tools/objtool/arch/loongarch/special.c +++ b/tools/objtool/arch/loongarch/special.c @@ -9,7 +9,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { return NULL; } diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c index d33868147196..51610689abf7 100644 --- a/tools/objtool/arch/powerpc/special.c +++ b/tools/objtool/arch/powerpc/special.c @@ -13,7 +13,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, } struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { exit(-1); } diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 4ea0f9815fda..9c1c9df09aaa 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -109,7 +109,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps. */ struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn) + struct instruction *insn, + unsigned long *table_size) { struct reloc *text_reloc, *rodata_reloc; struct section *table_sec; @@ -158,5 +159,6 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, if (reloc_type(text_reloc) == R_X86_64_PC32) file->ignore_unreachables = true; + *table_size = 0; return rodata_reloc; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f7586f82b967..753dbc4f8198 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -150,6 +150,15 @@ static inline struct reloc *insn_jump_table(struct instruction *insn) return NULL; } +static inline unsigned long insn_jump_table_size(struct instruction *insn) +{ + if (insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_CALL_DYNAMIC) + return insn->_jump_table_size; + + return 0; +} + static bool is_jump_table_jump(struct instruction *insn) { struct alt_group *alt_group = insn->alt_group; @@ -614,108 +623,6 @@ static int init_pv_ops(struct objtool_file *file) return 0; } -static struct instruction *find_last_insn(struct objtool_file *file, - struct section *sec) -{ - struct instruction *insn = NULL; - unsigned int offset; - unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - - for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) - insn = find_insn(file, sec, offset); - - return insn; -} - -/* - * Mark "ud2" instructions and manually annotated dead ends. - */ -static int add_dead_ends(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - uint64_t offset; - - /* - * Check for manually annotated dead ends. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!rsec) - goto reachable; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = true; - } - -reachable: - /* - * These manually annotated reachable checks are needed for GCC 4.4, - * where the Linux unreachable() macro isn't supported. In that case - * GCC doesn't know the "ud2" is fatal, so it generates code as if it's - * not a dead end. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = false; - } - - return 0; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -1310,40 +1217,6 @@ static void add_uaccess_safe(struct objtool_file *file) } /* - * FIXME: For now, just ignore any alternatives which add retpolines. This is - * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. - * But it at least allows objtool to understand the control flow *around* the - * retpoline. - */ -static int add_ignore_alternatives(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - - rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.ignore_alts entry"); - return -1; - } - - insn->ignore_alts = true; - } - - return 0; -} - -/* * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol * will be added to the .retpoline_sites section. */ @@ -2073,6 +1946,7 @@ out: static int add_jump_table(struct objtool_file *file, struct instruction *insn, struct reloc *next_table) { + unsigned long table_size = insn_jump_table_size(insn); struct symbol *pfunc = insn_func(insn)->pfunc; struct reloc *table = insn_jump_table(insn); struct instruction *dest_insn; @@ -2087,6 +1961,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, for_each_reloc_from(table->sec, reloc) { /* Check for the end of the table: */ + if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size) + break; if (reloc != table && reloc == next_table) break; @@ -2131,12 +2007,12 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, * find_jump_table() - Given a dynamic jump, find the switch jump table * associated with it. */ -static struct reloc *find_jump_table(struct objtool_file *file, - struct symbol *func, - struct instruction *insn) +static void find_jump_table(struct objtool_file *file, struct symbol *func, + struct instruction *insn) { struct reloc *table_reloc; struct instruction *dest_insn, *orig_insn = insn; + unsigned long table_size; /* * Backward search using the @first_jump_src links, these help avoid @@ -2157,17 +2033,17 @@ static struct reloc *find_jump_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; - table_reloc = arch_find_switch_table(file, insn); + table_reloc = arch_find_switch_table(file, insn, &table_size); if (!table_reloc) continue; dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc)); if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func) continue; - return table_reloc; + orig_insn->_jump_table = table_reloc; + orig_insn->_jump_table_size = table_size; + break; } - - return NULL; } /* @@ -2178,7 +2054,6 @@ static void mark_func_jump_tables(struct objtool_file *file, struct symbol *func) { struct instruction *insn, *last = NULL; - struct reloc *reloc; func_for_each_insn(file, func, insn) { if (!last) @@ -2201,9 +2076,7 @@ static void mark_func_jump_tables(struct objtool_file *file, if (insn->type != INSN_JUMP_DYNAMIC) continue; - reloc = find_jump_table(file, func, insn); - if (reloc) - insn->_jump_table = reloc; + find_jump_table(file, func, insn); } } @@ -2373,185 +2246,147 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_noendbr_hints(struct objtool_file *file) +static int read_annotate(struct objtool_file *file, + int (*func)(struct objtool_file *file, int type, struct instruction *insn)) { + struct section *sec; struct instruction *insn; - struct section *rsec; struct reloc *reloc; + uint64_t offset; + int type, ret; - rsec = find_section_by_name(file->elf, ".rela.discard.noendbr"); - if (!rsec) + sec = find_section_by_name(file->elf, ".discard.annotate_insn"); + if (!sec) return 0; - for_each_reloc(rsec, reloc) { - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.noendbr entry"); - return -1; - } + if (!sec->rsec) + return 0; - insn->noendbr = 1; + if (sec->sh.sh_entsize != 8) { + static bool warned = false; + if (!warned) { + WARN("%s: dodgy linker, sh_entsize != 8", sec->name); + warned = true; + } + sec->sh.sh_entsize = 8; } - return 0; -} - -static int read_retpoline_hints(struct objtool_file *file) -{ - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); - if (!rsec) - return 0; + for_each_reloc(sec->rsec, reloc) { + type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + offset = reloc->sym->offset + reloc_addend(reloc); + insn = find_insn(file, reloc->sym->sec, offset); - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!insn) { - WARN("bad .discard.retpoline_safe entry"); + WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; } - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN && - insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); - return -1; - } - - insn->retpoline_safe = true; + ret = func(file, type, insn); + if (ret < 0) + return ret; } return 0; } -static int read_instr_hints(struct objtool_file *file) +static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; + switch (type) { + case ANNOTYPE_IGNORE_ALTS: + insn->ignore_alts = true; + break; - rsec = find_section_by_name(file->elf, ".rela.discard.instr_end"); - if (!rsec) - return 0; + /* + * Must be before read_unwind_hints() since that needs insn->noendbr. + */ + case ANNOTYPE_NOENDBR: + insn->noendbr = 1; + break; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + default: + break; + } - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } + return 0; +} - insn->instr--; - } +static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn) +{ + unsigned long dest_off; - rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin"); - if (!rsec) + if (type != ANNOTYPE_INTRA_FUNCTION_CALL) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + if (insn->type != INSN_CALL) { + WARN_INSN(insn, "intra_function_call not a direct call"); + return -1; + } - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_begin entry"); - return -1; - } + /* + * Treat intra-function CALLs as JMPs, but with a stack_op. + * See add_call_destinations(), which strips stack_ops from + * normal CALLs. + */ + insn->type = INSN_JUMP_UNCONDITIONAL; - insn->instr++; + dest_off = arch_jump_destination(insn); + insn->jump_dest = find_insn(file, insn->sec, dest_off); + if (!insn->jump_dest) { + WARN_INSN(insn, "can't find call dest at %s+0x%lx", + insn->sec->name, dest_off); + return -1; } return 0; } -static int read_validate_unret_hints(struct objtool_file *file) +static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } + switch (type) { + case ANNOTYPE_NOENDBR: + /* early */ + break; - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); + case ANNOTYPE_RETPOLINE_SAFE: + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); return -1; } - insn->unret = 1; - } - - return 0; -} - -static int read_intra_function_calls(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; + insn->retpoline_safe = true; + break; - rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls"); - if (!rsec) - return 0; + case ANNOTYPE_INSTR_BEGIN: + insn->instr++; + break; - for_each_reloc(rsec, reloc) { - unsigned long dest_off; + case ANNOTYPE_INSTR_END: + insn->instr--; + break; - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", - rsec->name); - return -1; - } + case ANNOTYPE_UNRET_BEGIN: + insn->unret = 1; + break; - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.intra_function_call entry"); - return -1; - } + case ANNOTYPE_IGNORE_ALTS: + /* early */ + break; - if (insn->type != INSN_CALL) { - WARN_INSN(insn, "intra_function_call not a direct call"); - return -1; - } + case ANNOTYPE_INTRA_FUNCTION_CALL: + /* ifc */ + break; - /* - * Treat intra-function CALLs as JMPs, but with a stack_op. - * See add_call_destinations(), which strips stack_ops from - * normal CALLs. - */ - insn->type = INSN_JUMP_UNCONDITIONAL; + case ANNOTYPE_REACHABLE: + insn->dead_end = false; + break; - dest_off = arch_jump_destination(insn); - insn->jump_dest = find_insn(file, insn->sec, dest_off); - if (!insn->jump_dest) { - WARN_INSN(insn, "can't find call dest at %s+0x%lx", - insn->sec->name, dest_off); - return -1; - } + default: + WARN_INSN(insn, "Unknown annotation type: %d", type); + break; } return 0; @@ -2666,14 +2501,7 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); add_uaccess_safe(file); - ret = add_ignore_alternatives(file); - if (ret) - return ret; - - /* - * Must be before read_unwind_hints() since that needs insn->noendbr. - */ - ret = read_noendbr_hints(file); + ret = read_annotate(file, __annotate_early); if (ret) return ret; @@ -2695,7 +2523,7 @@ static int decode_sections(struct objtool_file *file) * Must be before add_call_destination(); it changes INSN_CALL to * INSN_JUMP. */ - ret = read_intra_function_calls(file); + ret = read_annotate(file, __annotate_ifc); if (ret) return ret; @@ -2703,14 +2531,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_call_destinations() such that it can override - * dead_end_function() marks. - */ - ret = add_dead_ends(file); - if (ret) - return ret; - ret = add_jump_table_alts(file); if (ret) return ret; @@ -2719,15 +2539,11 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_retpoline_hints(file); - if (ret) - return ret; - - ret = read_instr_hints(file); - if (ret) - return ret; - - ret = read_validate_unret_hints(file); + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ + ret = read_annotate(file, __annotate_late); if (ret) return ret; @@ -3820,9 +3636,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CONTEXT_SWITCH: - if (func && (!next_insn || !next_insn->hint)) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; + if (func) { + if (!next_insn || !next_insn->hint) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + break; } return 0; @@ -4573,6 +4392,8 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__jump_table") || !strcmp(sec->name, "__mcount_loc") || !strcmp(sec->name, ".kcfi_traps") || + !strcmp(sec->name, ".llvm.call-graph-profile") || + !strcmp(sec->name, ".llvm_bb_addr_map") || !strcmp(sec->name, "__tracepoints") || strstr(sec->name, "__patchable_function_entries")) continue; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 3d27983dc908..6f64d611faea 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -224,12 +224,17 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset) if (n) return 0; /* not a hole */ - /* didn't find a symbol for which @offset is after it */ - if (!hole.sym) - return 0; /* not a hole */ + /* + * @offset >= sym->offset + sym->len, find symbol after it. + * When hole.sym is empty, use the first node to compute the hole. + * If there is no symbol in the section, the first node will be NULL, + * in which case, -1 is returned to skip the whole section. + */ + if (hole.sym) + n = rb_next(&hole.sym->node); + else + n = rb_first_cached(&sec->symbol_tree); - /* @offset >= sym->offset + sym->len, find symbol after it */ - n = rb_next(&hole.sym->node); if (!n) return -1; /* until end of address space */ diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index daa46f1f0965..e1cd13cd28a3 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -71,7 +71,10 @@ struct instruction { struct instruction *first_jump_src; union { struct symbol *_call_dest; - struct reloc *_jump_table; + struct { + struct reloc *_jump_table; + unsigned long _jump_table_size; + }; }; struct alternative *alts; struct symbol *sym; diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index 86d4af9c5aa9..e7ee7ffccefd 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -38,5 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, struct instruction *insn, struct reloc *reloc); struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn); + struct instruction *insn, + unsigned long *table_size); #endif /* _SPECIAL_H */ diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e7da92489167..b2174894f9f7 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -11,7 +11,6 @@ NORETURN(__ia32_sys_exit) NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) -NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) @@ -20,6 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) +NORETURN(bch2_trans_unlocked_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 470258009ddc..7b530d838d40 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -95,7 +95,7 @@ static int run_test(struct pci_test *test) if (test->msinum > 0 && test->msinum <= 32) { ret = ioctl(fd, PCITEST_MSI, test->msinum); - fprintf(stdout, "MSI%d:\t\t", test->msinum); + fprintf(stdout, "MSI%u:\t\t", test->msinum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -104,7 +104,7 @@ static int run_test(struct pci_test *test) if (test->msixnum > 0 && test->msixnum <= 2048) { ret = ioctl(fd, PCITEST_MSIX, test->msixnum); - fprintf(stdout, "MSI-X%d:\t\t", test->msixnum); + fprintf(stdout, "MSI-X%u:\t\t", test->msixnum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -116,7 +116,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_WRITE, ¶m); - fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); + fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -128,7 +128,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_READ, ¶m); - fprintf(stdout, "READ (%7ld bytes):\t\t", test->size); + fprintf(stdout, "READ (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -140,7 +140,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_COPY, ¶m); - fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size); + fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f5b81d439387..5aaf73df6700 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,9 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log -tests/shell/*.shellcheck_log -tests/shell/coresight/*.shellcheck_log -tests/shell/lib/*.shellcheck_log +pmu-events/empty-pmu-events.log +pmu-events/test-empty-pmu-events.c +*.shellcheck_log feature/ libapi/ libbpf/ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 19cc179be9a7..40476b227f8d 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -1,6 +1,6 @@ i synthesize instructions events y synthesize cycles events - b synthesize branches events (branch misses for Arm SPE) + b synthesize branches events c synthesize branches events (calls only) r synthesize branches events (returns only) x synthesize transactions events diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index 0a3eda482307..37afade4f1b2 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -150,6 +150,7 @@ arm_spe/load_filter=1,min_latency=10/' pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege store_filter=1 - collect stores only (PMSFCR.ST) ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS) + discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD) +++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather than only the execution latency. @@ -187,7 +188,7 @@ groups: 7 llc-access 2 tlb-miss 1K tlb-access - 36 branch-miss + 36 branch 0 remote-access 900 memory @@ -220,6 +221,31 @@ Common errors Increase sampling interval (see above) +PMU events +~~~~~~~~~~ + +SPE has events that can be counted on core PMUs. These are prefixed with +SAMPLE_, for example SAMPLE_POP, SAMPLE_FEED, SAMPLE_COLLISION and +SAMPLE_FEED_BR. + +These events will only count when an SPE event is running on the same core that +the PMU event is opened on, otherwise they read as 0. There are various ways to +ensure that the PMU event and SPE event are scheduled together depending on the +way the event is opened. For example opening both events as per-process events +on the same process, although it's not guaranteed that the PMU event is enabled +first when context switching. For that reason it may be better to open the PMU +event as a systemwide event and then open SPE on the process of interest. + +Discard mode +~~~~~~~~~~~~ + +SPE related (SAMPLE_* etc) core PMU events can be used without the overhead of +collecting sample data if discard mode is supported (optional from Armv8.6). +First run a system wide SPE session (or on the core of interest) using options +to minimize output. Then run perf stat: + + perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null & + perf stat -e SAMPLE_FEED_LD SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt index 10f69fb6850b..31741499e786 100644 --- a/tools/perf/Documentation/perf-check.txt +++ b/tools/perf/Documentation/perf-check.txt @@ -47,15 +47,15 @@ feature:: bpf / HAVE_LIBBPF_SUPPORT bpf_skeletons / HAVE_BPF_SKEL debuginfod / HAVE_DEBUGINFOD_SUPPORT - dwarf / HAVE_DWARF_SUPPORT - dwarf_getlocations / HAVE_DWARF_GETLOCATIONS_SUPPORT + dwarf / HAVE_LIBDW_SUPPORT + dwarf_getlocations / HAVE_LIBDW_SUPPORT dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT auxtrace / HAVE_AUXTRACE_SUPPORT libaudit / HAVE_LIBAUDIT_SUPPORT libbfd / HAVE_LIBBFD_SUPPORT libcapstone / HAVE_LIBCAPSTONE_SUPPORT libcrypto / HAVE_LIBCRYPTO_SUPPORT - libdw-dwarf-unwind / HAVE_DWARF_SUPPORT + libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT libelf / HAVE_LIBELF_SUPPORT libnuma / HAVE_LIBNUMA_SUPPORT libopencsd / HAVE_CSTRACE_SUPPORT diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 379f9d7a8ab1..1f668d4724e3 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -247,6 +247,19 @@ annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. + annotate.disassemblers:: + Choose the disassembler to use: "objdump", "llvm", "capstone", + if not specified it will first try, if available, the "llvm" one, + then, if it fails, "capstone", and finally the original "objdump" + based one. + + Choosing a different one is useful when handling some feature that + is known to be best support at some point by one of the options, + to compare the output when in doubt about some bug, etc. + + This can be a list, in order of preference, the first one that works + finishes the process. + annotate.addr2line:: addr2line binary to use for file names and line numbers. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index dea005410ec0..d0c65fad419a 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] +'perf list' [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob] DESCRIPTION @@ -243,6 +243,21 @@ For accessing trace point events perf needs to have read access to /sys/kernel/tracing, even when perf_event_paranoid is in a relaxed setting. +TOOL/HWMON EVENTS +----------------- + +Some events don't have an associated PMU instead reading values +available to software without perf_event_open. As these events don't +support sampling they can only really be read by tools like perf stat. + +Tool events provide times and certain system parameters. Examples +include duration_time, user_time, system_time and num_cpus_online. + +Hwmon events provide easy access to hwmon sysfs data typically in +/sys/class/hwmon. This information includes temperatures, fan speeds +and energy usage. + + TRACING ------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7c66d81ab978..87f864519406 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -391,6 +391,14 @@ OPTIONS This allows to examine the path the program took to each sample. The data collection must have used -b (or -j) and -g. + Also show with some branch flags that can be: + - Predicted: display the average percentage of predicated branches. + (predicated number / total number) + - Abort: display the number of tsx aborted branches. + - Cycles: cycles in basic block. + + - iterations: display the average number of iterations in callchain list. + --addr2line=<path>:: Path to addr2line binary. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 3db64954a267..6dbbddb6464d 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -221,6 +221,14 @@ OPTIONS for 'perf sched timehist' priorities are specified with -: 120-129. A combination of both can also be provided: 0,120-129. +-P:: +--pre-migrations:: + Show pre-migration wait time. pre-migration wait time is the time spent + by a task waiting on a runqueue but not getting the chance to run there + and is migrated to a different runqueue where it is finally run. This + time between sched_wakeup and migrate_task is the pre-migration wait + time. + OPTIONS for 'perf sched replay' ------------------------------ diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 13e37e9385ee..27a1cac6fe76 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -624,7 +624,7 @@ as perf_trace_context.perf_script_context . perf_set_itrace_options(context, itrace_options) - set --itrace options if they have not been set already perf_sample_srcline(context) - returns source_file_name, line_number perf_sample_srccode(context) - returns source_file_name, line_number, source_line - + perf_config_get(config_name) - returns the value of the named config item, or None if unset Util.py Module ~~~~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 9acb8d1f6588..efcdec528a8f 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -48,3 +48,20 @@ OPTIONS --dso:: Specify a DSO for the "Symbols" test. + +-w:: +--workload=:: + Run a built-in workload, to list them use '--list-workloads', current ones include: + noploop, thloop, leafloop, sqrtloop, brstack, datasym and landlock. + + Used with the shell script regression tests. + + Some accept an extra parameter: + + seconds: leafloop, noploop, sqrtloop, thloop + nrloops: brstack + + The datasym and landlock workloads don't accept any. + +--list-workloads:: + List the available workloads to use with -w/--workload. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d4332675babb..2916d59c88cd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -31,7 +31,7 @@ $(call detected_var,SRCARCH) ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 1 - ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch riscv)) NO_SYSCALL_TABLE := 0 endif @@ -83,6 +83,10 @@ ifeq ($(ARCH),mips) LIBUNWIND_LIBS = -lunwind -lunwind-mips endif +ifeq ($(ARCH),riscv) + CFLAGS += -I$(OUTPUT)arch/riscv/include/generated +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -91,6 +95,10 @@ ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loon NO_LIBDW_DWARF_UNWIND := 1 endif +ifneq ($(LIBUNWIND),1) + NO_LIBUNWIND := 1 +endif + ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 endif @@ -162,8 +170,8 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) # Must put -ldl after -lebl for dependency DWARFLIBS += -ldl endif -FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) -FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) +FEATURE_CHECK_CFLAGS-libdw := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw := $(LIBDW_LDFLAGS) $(DWARFLIBS) # for linking with debug library, run like: # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ @@ -203,10 +211,6 @@ FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arc # include ARCH specific config -include $(src-perf)/arch/$(SRCARCH)/Makefile -ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET - CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -endif - include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) @@ -426,7 +430,7 @@ ifeq ($(feature-file-handle), 1) endif ifdef NO_LIBELF - NO_DWARF := 1 + NO_LIBDW := 1 NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 @@ -461,28 +465,11 @@ else endif endif else - ifndef NO_LIBDW_DWARF_UNWIND - ifneq ($(feature-libdw-dwarf-unwind),1) - NO_LIBDW_DWARF_UNWIND := 1 - $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR) + ifneq ($(feature-libdw), 1) + ifndef NO_LIBDW + $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.157, disables dwarf support. Please install new elfutils-devel/libdw-dev) + NO_LIBDW := 1 endif - endif - ifneq ($(feature-dwarf), 1) - ifndef NO_DWARF - $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev) - NO_DWARF := 1 - endif - else - ifneq ($(feature-dwarf_getlocations), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157) - else - CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT - endif # dwarf_getlocations - ifneq ($(feature-dwarf_getcfi), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142) - else - CFLAGS += -DHAVE_DWARF_CFI_SUPPORT - endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -493,7 +480,7 @@ ifeq ($(feature-libaio), 1) endif endif -ifdef NO_DWARF +ifdef NO_LIBDW NO_LIBDW_DWARF_UNWIND := 1 endif @@ -571,17 +558,12 @@ ifndef NO_LIBELF endif endif - ifndef NO_DWARF - ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled) - NO_DWARF := 1 - else - CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) - LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += ${DWARFLIBS} - $(call detected,CONFIG_DWARF) - endif # PERF_HAVE_DWARF_REGS - endif # NO_DWARF + ifndef NO_LIBDW + CFLAGS += -DHAVE_LIBDW_SUPPORT $(LIBDW_CFLAGS) + LDFLAGS += $(LIBDW_LDFLAGS) + EXTLIBS += ${DWARFLIBS} + $(call detected,CONFIG_LIBDW) + endif # NO_LIBDW ifndef NO_LIBBPF ifeq ($(feature-bpf), 1) @@ -630,7 +612,7 @@ ifdef PERF_HAVE_JITDUMP endif ifeq ($(SRCARCH),powerpc) - ifndef NO_DWARF + ifndef NO_LIBDW CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif endif @@ -750,8 +732,6 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT $(call detected,CONFIG_DWARF_UNWIND) -else - NO_DWARF_UNWIND := 1 endif ifndef NO_LOCAL_LIBUNWIND @@ -1194,7 +1174,7 @@ endif ifneq ($(NO_LIBTRACEEVENT),1) $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) - CFLAGS += -DHAVE_LIBTRACEEVENT + CFLAGS += -DHAVE_LIBTRACEEVENT $(shell $(PKG_CONFIG) --cflags libtraceevent) LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtraceevent) EXTLIBS += $(shell $(PKG_CONFIG) --libs-only-l libtraceevent) LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent).0.0 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9dd2e8d3f3c9..d74241a15131 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -40,7 +40,7 @@ include ../scripts/utilities.mak # # Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS. # -# Define NO_DWARF if you do not want debug-info analysis feature at all. +# Define NO_LIBDW if you do not want debug-info analysis feature at all. # # Define WERROR=0 to disable treating any warnings as errors. # @@ -52,7 +52,7 @@ include ../scripts/utilities.mak # # Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) # -# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# Define LIBUNWIND if you do not want libunwind dependency for dwarf # backtrace post unwind. # # Define NO_BACKTRACE if you do not want stack backtrace debug feature @@ -1128,12 +1128,11 @@ endif install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) tests/attr.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ - $(INSTALL) tests/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ + $(INSTALL) tests/shell/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ diff --git a/tools/perf/arch/arc/annotate/instructions.c b/tools/perf/arch/arc/annotate/instructions.c index 2f00e995c7e3..e5619770a1af 100644 --- a/tools/perf/arch/arc/annotate/instructions.c +++ b/tools/perf/arch/arc/annotate/instructions.c @@ -5,5 +5,7 @@ static int arc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { arch->initialized = true; arch->objdump.comment_char = ';'; + arch->e_machine = EM_ARC; + arch->e_flags = 0; return 0; } diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 1d88fdab13bf..8b59ce8efb89 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index 2ff6cedeb9c5..cf91a43362b0 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -53,6 +53,8 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm__associate_instruction_ops; arch->objdump.comment_char = ';'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_ARM; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e6dd7cd79ebd..f7a8b37d1c68 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,7 +1,5 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o - perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c deleted file mode 100644 index fc5f71c91802..000000000000 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <stddef.h> -#include <linux/stringify.h> -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%r##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - REG_DWARFNUM_NAME("%fp", 11), - REG_DWARFNUM_NAME("%ip", 12), - REG_DWARFNUM_NAME("%sp", 13), - REG_DWARFNUM_NAME("%lr", 14), - REG_DWARFNUM_NAME("%pc", 15), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 5735ed4479bb..91570d5d428e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 HAVE_KVM_STAT_SUPPORT := 1 # diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index f86d9f4798bd..d465d093e7eb 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -113,6 +113,8 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm64__associate_instruction_ops; arch->objdump.comment_char = '/'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_AARCH64; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 343ef7589a77..a74521b79eaa 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -4,7 +4,6 @@ perf-util-y += perf_regs.o perf-util-y += tsc.o perf-util-y += pmu.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2be99fdf997d..22b19dcc6beb 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -23,9 +23,12 @@ #include "../../../util/debug.h" #include "../../../util/auxtrace.h" #include "../../../util/record.h" +#include "../../../util/header.h" #include "../../../util/arm-spe.h" #include <tools/libc_compat.h> // reallocarray +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -37,11 +40,84 @@ struct arm_spe_recording { bool *wrapped; }; +/* + * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke + * perf_cpu_map__put() to release the map after use. + */ +static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) +{ + struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); + struct perf_cpu_map *intersect_cpus; + + /* cpu map is not "any" CPU , we have specific CPUs to work with */ + if (!perf_cpu_map__has_any_cpu(event_cpus)) { + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); + perf_cpu_map__put(online_cpus); + /* Event can be "any" CPU so count all CPUs. */ + } else { + intersect_cpus = online_cpus; + } + + return intersect_cpus; +} + static size_t arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused) + struct evlist *evlist) +{ + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); + size_t size; + + if (!cpu_map) + return 0; + + size = ARM_SPE_AUXTRACE_PRIV_MAX + + ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); + size *= sizeof(u64); + + perf_cpu_map__put(cpu_map); + return size; +} + +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, + struct perf_cpu cpu, __u64 data[]) { - return ARM_SPE_AUXTRACE_PRIV_SIZE; + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *pmu = NULL; + char *cpuid = NULL; + u64 val; + + /* Read CPU MIDR */ + cpuid = get_cpuid_allow_env_override(cpu); + if (!cpuid) + return -ENOMEM; + val = strtol(cpuid, NULL, 16); + + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; + data[ARM_SPE_CPU] = cpu.cpu; + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; + data[ARM_SPE_CPU_MIDR] = val; + + /* Find the associate Arm SPE PMU for the CPU */ + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) + pmu = sper->arm_spe_pmu; + + if (!pmu) { + /* No Arm SPE PMU is found */ + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; + data[ARM_SPE_CAP_MIN_IVAL] = 0; + } else { + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; + + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) + val = 0; + data[ARM_SPE_CAP_MIN_IVAL] = val; + } + + free(cpuid); + return ARM_SPE_CPU_PRIV_MAX; } static int arm_spe_info_fill(struct auxtrace_record *itr, @@ -49,20 +125,46 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_record_auxtrace_info *auxtrace_info, size_t priv_size) { + int i, ret; + size_t offset; struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_cpu_map *cpu_map; + struct perf_cpu cpu; + __u64 *data; - if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) return -EINVAL; if (!session->evlist->core.nr_mmaps) return -EINVAL; + cpu_map = arm_spe_find_cpus(session->evlist); + if (!cpu_map) + return -EINVAL; + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; + auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); + + offset = ARM_SPE_AUXTRACE_PRIV_MAX; + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { + assert(offset < priv_size); + data = &auxtrace_info->priv[offset]; + ret = arm_spe_save_cpu_header(itr, cpu, data); + if (ret < 0) + goto out; + offset += ret; + } - return 0; + ret = 0; +out: + perf_cpu_map__put(cpu_map); + return ret; } static void @@ -188,9 +290,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, evlist__for_each_entry(evlist, evsel) { if (evsel__is_aux_event(evsel)) { - if (!strstarts(evsel->pmu_name, ARM_SPE_PMU_NAME)) { + if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { pr_err("Found unexpected auxtrace event: %s\n", - evsel->pmu_name); + evsel->pmu->name); return -EINVAL; } opts->full_auxtrace = true; diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c deleted file mode 100644 index 917b97d7c5d3..000000000000 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <errno.h> -#include <stddef.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> /* for struct user_pt_regs */ -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%x##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} -#define DWARFNUM2OFFSET(index) \ - (index * sizeof((struct user_pt_regs *)0)->regs[0]) - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - REG_DWARFNUM_NAME("%lr", 30), - REG_DWARFNUM_NAME("%sp", 31), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return DWARFNUM2OFFSET(roff->dwarfnum); - return -EINVAL; -} diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index 741df3614a09..f445a2dd6293 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -14,73 +14,66 @@ #define MIDR_REVISION_MASK GENMASK(3, 0) #define MIDR_VARIANT_MASK GENMASK(23, 20) -static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { + char path[PATH_MAX]; + FILE *file; const char *sysfs = sysfs__mountpoint(); - struct perf_cpu cpu; - int idx, ret = EINVAL; + assert(cpu.cpu != -1); if (!sysfs || sz < MIDR_SIZE) return EINVAL; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - char path[PATH_MAX]; - FILE *file; - - scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, - sysfs, cpu.cpu); - - file = fopen(path, "r"); - if (!file) { - pr_debug("fopen failed for file %s\n", path); - continue; - } - - if (!fgets(buf, MIDR_SIZE, file)) { - fclose(file); - continue; - } - fclose(file); + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, sysfs, cpu.cpu); - /* got midr break loop */ - ret = 0; - break; + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + return EINVAL; } - return ret; + if (!fgets(buf, MIDR_SIZE, file)) { + pr_debug("Failed to read file %s\n", path); + fclose(file); + return EINVAL; + } + fclose(file); + return 0; } -int get_cpuid(char *buf, size_t sz) +int get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { - struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); - int ret; + struct perf_cpu_map *cpus; + int idx; + if (cpu.cpu != -1) + return _get_cpuid(buf, sz, cpu); + + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return EINVAL; - ret = _get_cpuid(buf, sz, cpus); - - perf_cpu_map__put(cpus); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + int ret = _get_cpuid(buf, sz, cpu); - return ret; + if (ret == 0) + return 0; + } + return EINVAL; } -char *get_cpuid_str(struct perf_pmu *pmu) +char *get_cpuid_str(struct perf_cpu cpu) { - char *buf = NULL; + char *buf = malloc(MIDR_SIZE); int res; - if (!pmu || !pmu->cpus) - return NULL; - - buf = malloc(MIDR_SIZE); if (!buf) return NULL; /* read midr from list of cpus mapped to this pmu */ - res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + res = get_cpuid(buf, MIDR_SIZE, cpu); if (res) { - pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + pr_err("failed to get cpuid string for CPU %d\n", cpu.cpu); free(buf); buf = NULL; } diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 2a4eab2d160e..895fb0d0610c 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,30 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <internal/cpumap.h> -#include "../../../util/cpumap.h" -#include "../../../util/header.h" #include "../../../util/pmu.h" #include "../../../util/pmus.h" +#include "../../../util/tool_pmu.h" #include <api/fs/fs.h> -#include <math.h> -const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - struct perf_pmu *pmu; - - /* Metrics aren't currently supported on heterogeneous Arm systems */ - if (perf_pmus__num_core_pmus() > 1) - return NULL; - - /* Doesn't matter which one here because they'll all be the same */ - pmu = perf_pmus__find_core_pmu(); - if (pmu) - return perf_pmu__find_metrics_table(pmu); - - return NULL; -} - -double perf_pmu__cpu_slots_per_cycle(void) +u64 tool_pmu__cpu_slots_per_cycle(void) { char path[PATH_MAX]; unsigned long long slots = 0; @@ -41,5 +22,5 @@ double perf_pmu__cpu_slots_per_cycle(void) filename__read_ull(path, &slots); } - return slots ? (double)slots : NAN; + return slots; } diff --git a/tools/perf/arch/csky/Makefile b/tools/perf/arch/csky/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/csky/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/csky/annotate/instructions.c b/tools/perf/arch/csky/annotate/instructions.c index 5337bfb7d5fc..14270311d215 100644 --- a/tools/perf/arch/csky/annotate/instructions.c +++ b/tools/perf/arch/csky/annotate/instructions.c @@ -43,6 +43,11 @@ static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->objdump.comment_char = '/'; arch->associate_instruction_ops = csky__associate_ins_ops; - + arch->e_machine = EM_CSKY; +#if defined(__CSKYABIV2__) + arch->e_flags = EF_CSKY_ABIV2; +#else + arch->e_flags = EF_CSKY_ABIV1; +#endif return 0; } diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 99d83f41bf43..5e6ea82c4202 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,4 +1,3 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index c89d6bb6b184..52544d59245b 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index ab43b1ab51e3..70262d5f1444 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -131,6 +131,8 @@ int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = loongarch__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_LOONGARCH; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index b6b97de48233..0aa31986ecb5 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,7 +1,6 @@ perf-util-y += header.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c deleted file mode 100644 index 0f6ebc387463..000000000000 --- a/tools/perf/arch/loongarch/util/dwarf-regs.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include <stdio.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -static struct pt_regs_dwarfnum loongarch_gpr_table[] = { - {"%r0", 0}, {"%r1", 1}, {"%r2", 2}, {"%r3", 3}, - {"%r4", 4}, {"%r5", 5}, {"%r6", 6}, {"%r7", 7}, - {"%r8", 8}, {"%r9", 9}, {"%r10", 10}, {"%r11", 11}, - {"%r12", 12}, {"%r13", 13}, {"%r14", 14}, {"%r15", 15}, - {"%r16", 16}, {"%r17", 17}, {"%r18", 18}, {"%r19", 19}, - {"%r20", 20}, {"%r21", 21}, {"%r22", 22}, {"%r23", 23}, - {"%r24", 24}, {"%r25", 25}, {"%r26", 26}, {"%r27", 27}, - {"%r28", 28}, {"%r29", 29}, {"%r30", 30}, {"%r31", 31}, - {NULL, 0} -}; - -const char *get_arch_regstr(unsigned int n) -{ - n %= 32; - return loongarch_gpr_table[n].name; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = loongarch_gpr_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c index d962dff55512..0c6d823334a2 100644 --- a/tools/perf/arch/loongarch/util/header.c +++ b/tools/perf/arch/loongarch/util/header.c @@ -70,7 +70,7 @@ out_free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { int ret = 0; char *cpuid = _get_cpuid(); @@ -90,7 +90,7 @@ out_free: return ret; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile index cd0b011b3be5..827168f1077a 100644 --- a/tools/perf/arch/mips/Makefile +++ b/tools/perf/arch/mips/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - # Syscall table generation for perf out := $(OUTPUT)arch/mips/include/generated/asm header := $(out)/syscalls_n64.c diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c index 340993f2a897..b50b46c613d6 100644 --- a/tools/perf/arch/mips/annotate/instructions.c +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -40,6 +40,8 @@ int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = mips__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_MIPS; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1464c6be6eb3..c844cd5cda62 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -377,3 +377,7 @@ 460 n64 lsm_set_self_attr sys_lsm_set_self_attr 461 n64 lsm_list_modules sys_lsm_list_modules 462 n64 mseal sys_mseal +463 n64 setxattrat sys_setxattrat +464 n64 getxattrat sys_getxattrat +465 n64 listxattrat sys_listxattrat +466 n64 removexattrat sys_removexattrat diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build index e4644f1e68a0..691fa2051958 100644 --- a/tools/perf/arch/mips/util/Build +++ b/tools/perf/arch/mips/util/Build @@ -1,3 +1,2 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c deleted file mode 100644 index 25c13a91c2a7..000000000000 --- a/tools/perf/arch/mips/util/dwarf-regs.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2013 Cavium, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <stdio.h> -#include <dwarf-regs.h> - -static const char *mips_gpr_names[32] = { - "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", - "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", - "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", - "$30", "$31" -}; - -const char *get_arch_regstr(unsigned int n) -{ - if (n < 32) - return mips_gpr_names[n]; - if (n == 64) - return "hi"; - if (n == 65) - return "lo"; - return NULL; -} diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index bf6d323574f6..dc8f4fb8e324 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,10 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index ede9eeade0ab..ca567cfdcbdb 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -255,7 +255,7 @@ static struct ins_ops *check_ppc_insn(struct disasm_line *dl) * is moved to r31. update_insn_state_powerpc tracks these state * changes */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_powerpc(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die * cu_die __maybe_unused, struct disasm_line *dl) @@ -300,7 +300,7 @@ static void update_insn_state_powerpc(struct type_state *state, insn_offset, src->reg1, dst->reg1); pr_debug_type_name(&tsr->type, tsr->kind); } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { @@ -309,6 +309,8 @@ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = powerpc__associate_instruction_ops; arch->objdump.comment_char = '#'; annotate_opts.show_asm_raw = true; + arch->e_machine = EM_PPC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index ebae8415dfbb..d8b4ab78bef0 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -553,3 +553,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 6c588ecdf3bd..ed82715080f9 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -7,8 +7,7 @@ perf-util-y += sym-handling.o perf-util-y += evsel.o perf-util-y += event.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c deleted file mode 100644 index 104c7ae5c433..000000000000 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Ian Munsie, IBM Corporation. - */ - -#include <stddef.h> -#include <errno.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> -#include <linux/kernel.h> -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; - unsigned int ptregs_offset; -}; - -#define REG_DWARFNUM_NAME(r, num) \ - {.name = __stringify(%)__stringify(r), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, r)} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%gpr##num), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, gpr[num])} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} - -/* - * Reference: - * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - GPR_DWARFNUM_NAME(30), - GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME(msr, 66), - REG_DWARFNUM_NAME(ctr, 109), - REG_DWARFNUM_NAME(link, 108), - REG_DWARFNUM_NAME(xer, 101), - REG_DWARFNUM_NAME(dar, 119), - REG_DWARFNUM_NAME(dsisr, 118), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->ptregs_offset; - return -EINVAL; -} - -#define PPC_OP(op) (((op) >> 26) & 0x3F) -#define PPC_RA(a) (((a) >> 16) & 0x1f) -#define PPC_RT(t) (((t) >> 21) & 0x1f) -#define PPC_RB(b) (((b) >> 11) & 0x1f) -#define PPC_D(D) ((D) & 0xfffe) -#define PPC_DS(DS) ((DS) & 0xfffc) -#define OP_LD 58 -#define OP_STD 62 - -static int get_source_reg(u32 raw_insn) -{ - return PPC_RA(raw_insn); -} - -static int get_target_reg(u32 raw_insn) -{ - return PPC_RT(raw_insn); -} - -static int get_offset_opcode(u32 raw_insn) -{ - int opcode = PPC_OP(raw_insn); - - /* DS- form */ - if ((opcode == OP_LD) || (opcode == OP_STD)) - return PPC_DS(raw_insn); - else - return PPC_D(raw_insn); -} - -/* - * Fills the required fields for op_loc depending on if it - * is a source or target. - * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT - * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT - * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT - */ -void get_powerpc_regs(u32 raw_insn, int is_source, - struct annotated_op_loc *op_loc) -{ - if (is_source) - op_loc->reg1 = get_source_reg(raw_insn); - else - op_loc->reg1 = get_target_reg(raw_insn); - - if (op_loc->multi_regs) - op_loc->reg2 = PPC_RB(raw_insn); - - /* TODO: Implement offset handling for X Form */ - if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) - op_loc->offset = get_offset_opcode(raw_insn); -} diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6b00efd53638..c7df534dbf8f 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -10,9 +10,21 @@ #include "utils_header.h" #include "metricgroup.h" #include <api/fs/fs.h> +#include <sys/auxv.h> + +static bool is_compat_mode(void) +{ + u64 base_platform = getauxval(AT_BASE_PLATFORM); + u64 platform = getauxval(AT_PLATFORM); + + if (!strcmp((char *)platform, (char *)base_platform)) + return false; + + return true; +} int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { unsigned long pvr; int nb; @@ -30,11 +42,29 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *bufp; + unsigned long pvr; + + /* + * IBM Power System supports compatible mode. That is + * Nth generation platform can support previous generation + * OS in a mode called compatibile mode. For ex. LPAR can be + * booted in a Power9 mode when the system is a Power10. + * + * In the compatible mode, care must be taken when generating + * PVR value. When read, PVR will be of the AT_BASE_PLATFORM + * To support generic events, return 0x00ffffff as pvr when + * booted in compat mode. Based on this pvr value, json will + * pick events from pmu-events/arch/powerpc/compat + */ + if (!is_compat_mode()) + pvr = mfspr(SPRN_PVR); + else + pvr = 0x00ffffff; - if (asprintf(&bufp, "0x%.8lx", mfspr(SPRN_PVR)) < 0) + if (asprintf(&bufp, "0x%.8lx", pvr) < 0) bufp = NULL; return bufp; diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile index 90c3c476a242..18ad078000e2 100644 --- a/tools/perf/arch/riscv/Makefile +++ b/tools/perf/arch/riscv/Makefile @@ -1,6 +1,25 @@ -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +# SPDX-License-Identifier: GPL-2.0 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/riscv/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/riscv/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/riscv/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +$(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, riscv) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..c59f5e852b97 --- /dev/null +++ b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl @@ -0,0 +1,47 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> +# Changed by: Kim Phillips <kim.phillips@arm.com> +# Changed by: Björn Töpel <bjorn@rivosinc.com> + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_sc_table() +{ + local sc nr max_nr + + while read sc nr; do + printf "%s\n" " [$nr] = \"$sc\"," + max_nr=$nr + done + + echo "#define SYSCALLTBL_RISCV_MAX_ID $max_nr" +} + +create_table() +{ + echo "#include \"$input\"" + echo "static const char *const syscalltbl_riscv[] = {" + create_sc_table + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |awk '$2 ~ "__NR" && $3 !~ "__NR3264_" { + sub("^#define __NR(3264)?_", ""); + print | "sort -k2 -n"}' \ + |create_table diff --git a/tools/perf/arch/riscv/include/dwarf-regs-table.h b/tools/perf/arch/riscv/include/dwarf-regs-table.h new file mode 100644 index 000000000000..a45b63a6d5a8 --- /dev/null +++ b/tools/perf/arch/riscv/include/dwarf-regs-table.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef DEFINE_DWARF_REGSTR_TABLE +/* This is included in perf/util/dwarf-regs.c */ + +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg + +static const char * const riscv_regstr_tbl[] = { + REG_DWARFNUM_NAME("%zero", 0), + REG_DWARFNUM_NAME("%ra", 1), + REG_DWARFNUM_NAME("%sp", 2), + REG_DWARFNUM_NAME("%gp", 3), + REG_DWARFNUM_NAME("%tp", 4), + REG_DWARFNUM_NAME("%t0", 5), + REG_DWARFNUM_NAME("%t1", 6), + REG_DWARFNUM_NAME("%t2", 7), + REG_DWARFNUM_NAME("%s0", 8), + REG_DWARFNUM_NAME("%s1", 9), + REG_DWARFNUM_NAME("%a0", 10), + REG_DWARFNUM_NAME("%a1", 11), + REG_DWARFNUM_NAME("%a2", 12), + REG_DWARFNUM_NAME("%a3", 13), + REG_DWARFNUM_NAME("%a4", 14), + REG_DWARFNUM_NAME("%a5", 15), + REG_DWARFNUM_NAME("%a6", 16), + REG_DWARFNUM_NAME("%a7", 17), + REG_DWARFNUM_NAME("%s2", 18), + REG_DWARFNUM_NAME("%s3", 19), + REG_DWARFNUM_NAME("%s4", 20), + REG_DWARFNUM_NAME("%s5", 21), + REG_DWARFNUM_NAME("%s6", 22), + REG_DWARFNUM_NAME("%s7", 23), + REG_DWARFNUM_NAME("%s8", 24), + REG_DWARFNUM_NAME("%s9", 25), + REG_DWARFNUM_NAME("%s10", 26), + REG_DWARFNUM_NAME("%s11", 27), + REG_DWARFNUM_NAME("%t3", 28), + REG_DWARFNUM_NAME("%t4", 29), + REG_DWARFNUM_NAME("%t5", 30), + REG_DWARFNUM_NAME("%t6", 31), +}; + +#endif diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index f865cb0489ec..58a672246024 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -2,5 +2,4 @@ perf-util-y += perf_regs.o perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/dwarf-regs.c b/tools/perf/arch/riscv/util/dwarf-regs.c deleted file mode 100644 index cd0504c02e2e..000000000000 --- a/tools/perf/arch/riscv/util/dwarf-regs.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. - * Mapping of DWARF debug register numbers into register names. - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -struct pt_regs_dwarfnum riscv_dwarf_regs_table[] = { - REG_DWARFNUM_NAME("%zero", 0), - REG_DWARFNUM_NAME("%ra", 1), - REG_DWARFNUM_NAME("%sp", 2), - REG_DWARFNUM_NAME("%gp", 3), - REG_DWARFNUM_NAME("%tp", 4), - REG_DWARFNUM_NAME("%t0", 5), - REG_DWARFNUM_NAME("%t1", 6), - REG_DWARFNUM_NAME("%t2", 7), - REG_DWARFNUM_NAME("%s0", 8), - REG_DWARFNUM_NAME("%s1", 9), - REG_DWARFNUM_NAME("%a0", 10), - REG_DWARFNUM_NAME("%a1", 11), - REG_DWARFNUM_NAME("%a2", 12), - REG_DWARFNUM_NAME("%a3", 13), - REG_DWARFNUM_NAME("%a4", 14), - REG_DWARFNUM_NAME("%a5", 15), - REG_DWARFNUM_NAME("%a6", 16), - REG_DWARFNUM_NAME("%a7", 17), - REG_DWARFNUM_NAME("%s2", 18), - REG_DWARFNUM_NAME("%s3", 19), - REG_DWARFNUM_NAME("%s4", 20), - REG_DWARFNUM_NAME("%s5", 21), - REG_DWARFNUM_NAME("%s6", 22), - REG_DWARFNUM_NAME("%s7", 23), - REG_DWARFNUM_NAME("%s8", 24), - REG_DWARFNUM_NAME("%s9", 25), - REG_DWARFNUM_NAME("%s10", 26), - REG_DWARFNUM_NAME("%s11", 27), - REG_DWARFNUM_NAME("%t3", 28), - REG_DWARFNUM_NAME("%t4", 29), - REG_DWARFNUM_NAME("%t5", 30), - REG_DWARFNUM_NAME("%t6", 31), - REG_DWARFNUM_END, -}; - -#define RISCV_MAX_REGS ((sizeof(riscv_dwarf_regs_table) / \ - sizeof(riscv_dwarf_regs_table[0])) - 1) - -const char *get_arch_regstr(unsigned int n) -{ - return (n < RISCV_MAX_REGS) ? riscv_dwarf_regs_table[n].name : NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = riscv_dwarf_regs_table; roff->name; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c index 1b29030021ee..4b839203d4a5 100644 --- a/tools/perf/arch/riscv/util/header.c +++ b/tools/perf/arch/riscv/util/header.c @@ -81,7 +81,7 @@ free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cpuid = _get_cpuid(); int ret = 0; @@ -98,7 +98,7 @@ free: } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/riscv64/annotate/instructions.c b/tools/perf/arch/riscv64/annotate/instructions.c index 869a0eb28953..55cf911633f8 100644 --- a/tools/perf/arch/riscv64/annotate/instructions.c +++ b/tools/perf/arch/riscv64/annotate/instructions.c @@ -28,6 +28,8 @@ int riscv64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = riscv64__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_RISCV; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 56994e63b43a..c431c21b11ef 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index eeac25cca699..c61193f1e096 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -166,6 +166,8 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) if (s390__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } + arch->e_machine = EM_S390; + arch->e_flags = 0; } return err; diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 01071182763e..e9115b4d8b63 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -465,3 +465,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal sys_mseal +463 common setxattrat sys_setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat sys_removexattrat diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 1ac830030ff3..736c0ad09194 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,7 +2,6 @@ perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-y += machine.o diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c deleted file mode 100644 index dfddb3099bfa..000000000000 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright IBM Corp. 2010, 2017 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - */ - -#include <errno.h> -#include <stddef.h> -#include <stdlib.h> -#include <linux/kernel.h> -#include <asm/ptrace.h> -#include <string.h> -#include <dwarf-regs.h> -#include "dwarf-regs-table.h" - -const char *get_arch_regstr(unsigned int n) -{ - return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n]; -} - -/* - * Convert the register name into an offset to struct pt_regs (kernel). - * This is required by the BPF prologue generator. The BPF - * program is called in the BPF overflow handler in the perf - * core. - */ -int regs_query_register_offset(const char *name) -{ - unsigned long gpr; - - if (!name || strncmp(name, "%r", 2)) - return -EINVAL; - - errno = 0; - gpr = strtoul(name + 2, NULL, 10); - if (errno || gpr >= 16) - return -EINVAL; - - return offsetof(user_pt_regs, gprs) + 8 * gpr; -} diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 7933f6871c81..db54677a17d2 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -27,7 +27,7 @@ #define SYSINFO "/proc/sysinfo" #define SRVLVL "/proc/service_levels" -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cp, *line = NULL, *line2; char type[8], model[33], version[8], manufacturer[32], authorization[8]; @@ -137,11 +137,11 @@ skip_sysinfo: return (nbytes >= sz) ? ENOBUFS : 0; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu) { char *buf = malloc(128); - if (buf && get_cpuid(buf, 128)) + if (buf && get_cpuid(buf, 128, cpu)) zfree(&buf); return buf; } diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sh/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/sh/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sh/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c deleted file mode 100644 index 4b17fc86c73b..000000000000 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ /dev/null @@ -1,41 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -/* - * Generic dwarf analysis helpers - */ - -#define SH_MAX_REGS 18 -const char *sh_regs_table[SH_MAX_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "pc", - "pr", -}; - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sparc/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 4031db72ba71..8b59ce8efb89 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,6 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c index 2614c010c235..68c31580ccfc 100644 --- a/tools/perf/arch/sparc/annotate/instructions.c +++ b/tools/perf/arch/sparc/annotate/instructions.c @@ -163,6 +163,8 @@ static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->associate_instruction_ops = sparc__associate_instruction_ops; arch->objdump.comment_char = '#'; + arch->e_machine = EM_SPARC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sparc/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c deleted file mode 100644 index 1282cb2dc7bd..000000000000 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 David S. Miller <davem@davemloft.net> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define SPARC_MAX_REGS 96 - -const char *sparc_regs_table[SPARC_MAX_REGS] = { - "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7", - "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7", - "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7", - "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7", - "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", - "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", - "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", - "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", - "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39", - "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47", - "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55", - "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63", -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 67b4969a6738..a6b6e0a9308a 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 ### diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5caf5a17f03d..ae94b1f0b9cc 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -202,12 +202,13 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) if (x86__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } - + arch->e_machine = EM_X86_64; + arch->e_flags = 0; arch->initialized = true; return err; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_x86(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 534c74b14fab..4d0fb2fba7e2 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -468,3 +468,7 @@ 460 i386 lsm_set_self_attr sys_lsm_set_self_attr 461 i386 lsm_list_modules sys_lsm_list_modules 462 i386 mseal sys_mseal +463 i386 setxattrat sys_setxattrat +464 i386 getxattrat sys_getxattrat +465 i386 listxattrat sys_listxattrat +466 i386 removexattrat sys_removexattrat diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7093ee21c0d1..5eb708bff1c7 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -386,6 +386,10 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c deleted file mode 100644 index 360a082fc928..000000000000 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "tests/tests.h" -#include "cloexec.h" -#include "debug.h" -#include "evlist.h" -#include "evsel.h" -#include "arch-tests.h" -#include <internal/lib.h> // page_size - -#include <signal.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <errno.h> -#include <string.h> - -static pid_t spawn(void) -{ - pid_t pid; - - pid = fork(); - if (pid) - return pid; - - while(1) - sleep(5); - return 0; -} - -/* - * Create an event group that contains both a sampled hardware - * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then - * wait for the hardware perf counter to overflow and generate a PMI, - * which triggers an event read for both of the events in the group. - * - * Since reading Intel CQM event counters requires sending SMP IPIs, the - * CQM pmu needs to handle the above situation gracefully, and return - * the last read counter value to avoid triggering a WARN_ON_ONCE() in - * smp_call_function_many() caused by sending IPIs from NMI context. - */ -int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct evlist *evlist = NULL; - struct evsel *evsel = NULL; - struct perf_event_attr pe; - int i, fd[2], flag, ret; - size_t mmap_len; - void *event; - pid_t pid; - int err = TEST_FAIL; - - flag = perf_event_open_cloexec_flag(); - - evlist = evlist__new(); - if (!evlist) { - pr_debug("evlist__new failed\n"); - return TEST_FAIL; - } - - ret = parse_event(evlist, "intel_cqm/llc_occupancy/"); - if (ret) { - pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); - err = TEST_SKIP; - goto out; - } - - evsel = evlist__first(evlist); - if (!evsel) { - pr_debug("evlist__first failed\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = PERF_TYPE_HARDWARE; - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.read_format = PERF_FORMAT_GROUP; - - pe.sample_period = 128; - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; - - pid = spawn(); - - fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); - if (fd[0] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = evsel->attr.type; - pe.config = evsel->attr.config; - - fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); - if (fd[1] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - /* - * Pick a power-of-two number of pages + 1 for the meta-data - * page (struct perf_event_mmap_page). See tools/perf/design.txt. - */ - mmap_len = page_size * 65; - - event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); - if (event == (void *)(-1)) { - pr_debug("failed to mmap %d\n", errno); - goto out; - } - - sleep(1); - - err = TEST_OK; - - munmap(event, mmap_len); - - for (i = 0; i < 2; i++) - close(fd[i]); - - kill(pid, SIGKILL); - wait(NULL); -out: - evlist__delete(evlist); - return err; -} diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 09d61fa736e3..b217ed67cd4e 100644 --- a/tools/perf/arch/x86/tests/intel-pt-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -375,7 +375,7 @@ static int get_pt_caps(int cpu, struct pt_caps *caps) return 0; } -static bool is_hydrid(void) +static bool is_hybrid(void) { unsigned int eax, ebx, ecx, edx = 0; bool result; @@ -441,7 +441,7 @@ int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) int ret = TEST_OK; int cpu; - if (!is_hydrid()) { + if (!is_hybrid()) { test->test_cases[subtest].skip_reason = "not hybrid"; return TEST_SKIP; } diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 2607ed5c4296..848327378694 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -10,10 +10,6 @@ perf-util-y += evlist.o perf-util-y += mem-events.o perf-util-y += evsel.o perf-util-y += iostat.o -perf-util-y += env.o - -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 354780ff1605..ecbf61a7eb3a 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -55,11 +55,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int *err) { char buffer[64]; + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); int ret; *err = 0; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) { *err = ret; return NULL; diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c deleted file mode 100644 index 399c4a0a29d8..000000000000 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * Extracted from probe-finder.c - * - * Written by Masami Hiramatsu <mhiramat@redhat.com> - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <linux/ptrace.h> /* for struct pt_regs */ -#include <linux/kernel.h> /* for offsetof */ -#include <dwarf-regs.h> - -/* - * See arch/x86/kernel/ptrace.c. - * Different from it: - * - * - Since struct pt_regs is defined differently for user and kernel, - * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct - * field name of user's pt_regs), we make REG_OFFSET_NAME to accept - * both string name and reg field name. - * - * - Since accessing x86_32's pt_regs from x86_64 building is difficult - * and vise versa, we simply fill offset with -1, so - * get_arch_regstr() still works but regs_query_register_offset() - * returns error. - * The only inconvenience caused by it now is that we are not allowed - * to generate BPF prologue for a x86_64 kernel if perf is built for - * x86_32. This is really a rare usecase. - * - * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use - * the order defined by dwarf. - */ - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#ifdef __x86_64__ -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} -#else -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -#endif - -/* TODO: switching by dwarf address size */ -#ifndef __x86_64__ -static const struct pt_regs_offset x86_32_regoffset_table[] = { - REG_OFFSET_NAME_32("%ax", eax), - REG_OFFSET_NAME_32("%cx", ecx), - REG_OFFSET_NAME_32("%dx", edx), - REG_OFFSET_NAME_32("%bx", ebx), - REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ - REG_OFFSET_NAME_32("%bp", ebp), - REG_OFFSET_NAME_32("%si", esi), - REG_OFFSET_NAME_32("%di", edi), - REG_OFFSET_END, -}; - -#define regoffset_table x86_32_regoffset_table -#else -static const struct pt_regs_offset x86_64_regoffset_table[] = { - REG_OFFSET_NAME_64("%ax", rax), - REG_OFFSET_NAME_64("%dx", rdx), - REG_OFFSET_NAME_64("%cx", rcx), - REG_OFFSET_NAME_64("%bx", rbx), - REG_OFFSET_NAME_64("%si", rsi), - REG_OFFSET_NAME_64("%di", rdi), - REG_OFFSET_NAME_64("%bp", rbp), - REG_OFFSET_NAME_64("%sp", rsp), - REG_OFFSET_NAME_64("%r8", r8), - REG_OFFSET_NAME_64("%r9", r9), - REG_OFFSET_NAME_64("%r10", r10), - REG_OFFSET_NAME_64("%r11", r11), - REG_OFFSET_NAME_64("%r12", r12), - REG_OFFSET_NAME_64("%r13", r13), - REG_OFFSET_NAME_64("%r14", r14), - REG_OFFSET_NAME_64("%r15", r15), - REG_OFFSET_END, -}; - -#define regoffset_table x86_64_regoffset_table -#endif - -/* Minus 1 for the ending REG_OFFSET_END */ -#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; -} - -/* Reuse code from arch/x86/kernel/ptrace.c */ -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -struct dwarf_regs_idx { - const char *name; - int idx; -}; - -static const struct dwarf_regs_idx x86_regidx_table[] = { - { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, - { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, - { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, - { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, - { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, - { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, - { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, - { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, - { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, - { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, - { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, - { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, - { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, - { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, - { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, - { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, - { "rip", DWARF_REG_PC }, -}; - -int get_arch_regnum(const char *name) -{ - unsigned int i; - - if (*name != '%') - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) - if (!strcmp(x86_regidx_table[i].name, name + 1)) - return x86_regidx_table[i].idx; - return -ENOENT; -} diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c deleted file mode 100644 index 3e537ffb1353..000000000000 --- a/tools/perf/arch/x86/util/env.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "linux/string.h" -#include "util/env.h" -#include "env.h" - -bool x86__is_amd_cpu(void) -{ - struct perf_env env = { .total_mem = 0, }; - static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ - - if (is_amd) - goto ret; - - perf_env__cpuid(&env); - is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1; - perf_env__exit(&env); -ret: - return is_amd >= 1 ? true : false; -} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h deleted file mode 100644 index d78f080b6b3f..000000000000 --- a/tools/perf/arch/x86/util/env.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _X86_ENV_H -#define _X86_ENV_H - -bool x86__is_amd_cpu(void); - -#endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index cebdd483149e..447a734e591c 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -1,91 +1,86 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include "util/pmu.h" -#include "util/pmus.h" -#include "util/evlist.h" -#include "util/parse-events.h" -#include "util/event.h" +#include <string.h> +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" #include "topdown.h" #include "evsel.h" -static int ___evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - LIST_HEAD(head); - size_t i = 0; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - if (perf_pmus__num_core_pmus() == 1) - return evlist__add_attrs(evlist, attrs, nr_attrs); - - for (i = 0; i < nr_attrs; i++) { - struct perf_pmu *pmu = NULL; - - if (attrs[i].type == PERF_TYPE_SOFTWARE) { - struct evsel *evsel = evsel__new(attrs + i); - - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - continue; - } - - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - struct perf_cpu_map *cpus; - struct evsel *evsel; - - evsel = evsel__new(attrs + i); - if (evsel == NULL) - goto out_delete_partial_list; - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - cpus = perf_cpu_map__get(pmu->cpus); - evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->pmu_name = strdup(pmu->name); - list_add_tail(&evsel->core.node, &head); - } - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - { - struct evsel *evsel, *n; - - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - } - return -1; -} - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { + /* + * Currently the following topdown events sequence are supported to + * move and regroup correctly. + * + * a. all events in a group + * perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 15,066,240 slots + * 1,899,760 instructions + * 2,126,998 topdown-retiring + * b. all events not in a group + * perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 2,045,561 instructions + * 17,108,370 slots + * 2,281,116 topdown-retiring + * c. slots event in a group but topdown metrics events outside the group + * perf stat -e "{instructions,slots},topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 20,323,878 slots + * 2,634,884 instructions + * 3,028,656 topdown-retiring + * d. slots event and topdown metrics events in two groups + * perf stat -e "{instructions,slots},{topdown-retiring}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 26,319,024 slots + * 2,427,791 instructions + * 2,683,508 topdown-retiring + * + * If slots event and topdown metrics events are not in same group, the + * topdown metrics events must be first event after the slots event group, + * otherwise topdown metrics events can't be regrouped correctly, e.g. + * + * a. perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 17,923,134 slots + * 2,154,855 instructions + * 3,015,058 cycles + * <not supported> topdown-retiring + * + * If slots event and topdown metrics events are in two groups, the group which + * has topdown metrics events must contain only the topdown metrics event, + * otherwise topdown metrics event can't be regrouped correctly as well, e.g. + * + * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Error: + * The sys_perf_event_open() syscall returned with 22 (Invalid argument) for + * event (topdown-retiring) + */ if (topdown_sys_has_perf_metrics() && (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { /* Ensure the topdown slots comes first. */ - if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(lhs)) return -1; - if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(rhs)) return 1; - /* Followed by topdown events. */ - if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown")) + + /* + * Move topdown metrics events forward only when topdown metrics + * events are not in same group with previous slots event. If + * topdown metrics events are already in same group with slots + * event, do nothing. + */ + if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return -1; - if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown")) + if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return 1; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 090d0f371891..3dd29ba2c23b 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -6,6 +6,7 @@ #include "util/pmu.h" #include "util/pmus.h" #include "linux/string.h" +#include "topdown.h" #include "evsel.h" #include "util/debug.h" #include "env.h" @@ -21,7 +22,8 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) /* Check whether the evsel's PMU supports the perf metrics */ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { - const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + struct perf_pmu *pmu; + u32 type = evsel->core.attr.type; /* * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU @@ -31,11 +33,31 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) * Checking both the PERF_TYPE_RAW type and the slots event * should be good enough to detect the perf metrics feature. */ - if ((evsel->core.attr.type == PERF_TYPE_RAW) && - perf_pmus__have_event(pmu_name, "slots")) - return true; +again: + switch (type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + if (type) + goto again; + break; + case PERF_TYPE_RAW: + break; + default: + return false; + } + + pmu = evsel->pmu; + if (pmu && perf_pmu__is_fake(pmu)) + pmu = NULL; - return false; + if (!pmu) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == PERF_TYPE_RAW) + break; + } + } + return pmu && perf_pmu__have_event(pmu, "slots"); } bool arch_evsel__must_be_in_group(const struct evsel *evsel) @@ -44,7 +66,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel) strcasestr(evsel->name, "uops_retired.slots")) return false; - return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots"); + return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); } int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) @@ -63,7 +85,7 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return scnprintf(bf, size, "%s", event_name); return scnprintf(bf, size, "%s/%s/", - evsel->pmu_name ? evsel->pmu_name : "cpu", + evsel->pmu ? evsel->pmu->name : "cpu", event_name); } @@ -108,7 +130,7 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) return 0; if (!evsel->core.attr.precise_ip && - !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3))) + !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3))) return 0; /* More verbose IBS errors. */ diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index a51444a77a5f..412977f8aa83 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -58,13 +58,12 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) } int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); } -char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index ea510a7486b1..8f235d8b67b6 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -75,7 +75,8 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, goto out_free; attr.config = *config; - err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*err=*/NULL); + err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false, + /*err=*/NULL); if (err) goto out_free; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index df7b5dfcc26a..366b44d0bb7e 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -444,7 +444,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, iostat_value = (count->val - prev_count_val) / ((double) count->run / count->ena); } - out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.0f", iostat_metric, iostat_value / (256 * 1024)); } diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index c3d89d6ba1bf..e0060dac2a9f 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -16,7 +16,7 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" -#include "env.h" +#include "util/env.h" void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 3f9a267d4501..f63747d0abdf 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "api/fs/fs.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" #include "util/topdown.h" @@ -32,6 +33,31 @@ bool topdown_sys_has_perf_metrics(void) } #define TOPDOWN_SLOTS 0x0400 +bool arch_is_topdown_slots(const struct evsel *evsel) +{ + if (evsel->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +bool arch_is_topdown_metrics(const struct evsel *evsel) +{ + int config = evsel->core.attr.config; + const char *name_from_config; + struct perf_pmu *pmu; + + /* All topdown events have an event code of 0. */ + if ((config & 0xFF) != 0) + return false; + + pmu = evsel__find_pmu(evsel); + if (!pmu || !pmu->is_core) + return false; + + name_from_config = perf_pmu__name_from_config(pmu, config); + return name_from_config && strcasestr(name_from_config, "topdown"); +} /* * Check whether a topdown group supports sample-read. @@ -41,11 +67,24 @@ bool topdown_sys_has_perf_metrics(void) */ bool arch_topdown_sample_read(struct evsel *leader) { + struct evsel *evsel; + if (!evsel__sys_has_perf_metrics(leader)) return false; - if (leader->core.attr.config == TOPDOWN_SLOTS) - return true; + if (!arch_is_topdown_slots(leader)) + return false; + + /* + * If slots event as leader event but no topdown metric events + * in group, slots event should still sample as leader. + */ + evlist__for_each_entry(leader->evlist, evsel) { + if (evsel->core.leader != leader->core.leader) + return false; + if (evsel != leader && arch_is_topdown_metrics(evsel)) + return true; + } return false; } diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 46bf9273e572..1bae9b1822d7 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -3,5 +3,7 @@ #define _TOPDOWN_H 1 bool topdown_sys_has_perf_metrics(void); +bool arch_is_topdown_slots(const struct evsel *evsel); +bool arch_is_topdown_metrics(const struct evsel *evsel); #endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index e2d6cfe21057..3a439e4b12d2 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -24,9 +24,9 @@ u64 rdtsc(void) * ... * will return 3000000000. */ -static double cpuinfo_tsc_freq(void) +static u64 cpuinfo_tsc_freq(void) { - double result = 0; + u64 result = 0; FILE *cpuinfo; char *line = NULL; size_t len = 0; @@ -34,20 +34,22 @@ static double cpuinfo_tsc_freq(void) cpuinfo = fopen("/proc/cpuinfo", "r"); if (!cpuinfo) { pr_err("Failed to read /proc/cpuinfo for TSC frequency\n"); - return NAN; + return 0; } while (getline(&line, &len, cpuinfo) > 0) { if (!strncmp(line, "model name", 10)) { char *pos = strstr(line + 11, " @ "); + double float_result; - if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { - result *= 1000000000; + if (pos && sscanf(pos, " @ %lfGHz", &float_result) == 1) { + float_result *= 1000000000; + result = (u64)float_result; goto out; } } } out: - if (fpclassify(result) == FP_ZERO) + if (result == 0) pr_err("Failed to find TSC frequency in /proc/cpuinfo\n"); free(line); @@ -55,7 +57,7 @@ out: return result; } -double arch_get_tsc_freq(void) +u64 arch_get_tsc_freq(void) { unsigned int a, b, c, d, lvl; static bool cached; @@ -86,6 +88,6 @@ double arch_get_tsc_freq(void) return tsc; } - tsc = (double)c * (double)b / (double)a; + tsc = (u64)c * (u64)b / (u64)a; return tsc; } diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/xtensa/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/xtensa/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build deleted file mode 100644 index e813e618954b..000000000000 --- a/tools/perf/arch/xtensa/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c deleted file mode 100644 index 12f5457300f5..000000000000 --- a/tools/perf/arch/xtensa/util/dwarf-regs.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (c) 2015 Cadence Design Systems Inc. - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define XTENSA_MAX_REGS 16 - -const char *xtensa_regs_table[XTENSA_MAX_REGS] = { - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", -}; - -const char *get_arch_regstr(unsigned int n) -{ - return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; -} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 1fbd7c947abc..19be2aaf4dc0 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -27,6 +27,7 @@ #include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> +#include <sys/stat.h> #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> @@ -35,6 +36,7 @@ #include "../util/header.h" #include "../util/mutex.h" +#include <api/fs/fs.h> #include <numa.h> #include <numaif.h> @@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg) return 0; } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +static int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + static int parse_setup_cpu_list(void) { struct thread_data *td; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3af6d3c55aba..e2562677df96 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -23,6 +23,7 @@ #include <errno.h> #include <fcntl.h> #include <assert.h> +#include <sys/epoll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/syscall.h> @@ -34,6 +35,8 @@ struct thread_data { int nr; int pipe_read; int pipe_write; + struct epoll_event epoll_ev; + int epoll_fd; bool cgroup_failed; pthread_t pthread; }; @@ -44,6 +47,7 @@ static int loops = LOOPS_DEFAULT; /* Use processes by default: */ static bool threaded; +static bool nonblocking; static char *cgrp_names[2]; static struct cgroup *cgrps[2]; @@ -81,6 +85,7 @@ out: } static const struct option options[] = { + OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV", @@ -165,11 +170,25 @@ static void exit_cgroup(int nr) free(cgrp_names[nr]); } +static inline int read_pipe(struct thread_data *td) +{ + int ret, m; +retry: + if (nonblocking) { + ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1); + if (ret < 0) + return ret; + } + ret = read(td->pipe_read, &m, sizeof(int)); + if (nonblocking && ret < 0 && errno == EWOULDBLOCK) + goto retry; + return ret; +} + static void *worker_thread(void *__tdata) { struct thread_data *td = __tdata; - int m = 0, i; - int ret; + int i, ret, m = 0; ret = enter_cgroup(td->nr); if (ret < 0) { @@ -177,16 +196,23 @@ static void *worker_thread(void *__tdata) return NULL; } + if (nonblocking) { + td->epoll_ev.events = EPOLLIN; + td->epoll_fd = epoll_create(1); + BUG_ON(td->epoll_fd < 0); + BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0); + } + for (i = 0; i < loops; i++) { if (!td->nr) { - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); } else { ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); } } @@ -209,13 +235,16 @@ int bench_sched_pipe(int argc, const char **argv) * discarding returned value of read(), write() * causes error in building environment for perf */ - int __maybe_unused ret, wait_stat; + int __maybe_unused ret, wait_stat, flags = 0; pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - BUG_ON(pipe(pipe_1)); - BUG_ON(pipe(pipe_2)); + if (nonblocking) + flags |= O_NONBLOCK; + + BUG_ON(pipe2(pipe_1, flags)); + BUG_ON(pipe2(pipe_2, flags)); gettimeofday(&start, NULL); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3dc6197ef3fa..bb87e6e7687d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -840,7 +840,7 @@ int cmd_annotate(int argc, const char **argv) } #endif -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT if (annotate.data_type) { pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); return -ENOTSUP; diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c index 0b76b6e42b78..2346536a5ee1 100644 --- a/tools/perf/builtin-check.c +++ b/tools/perf/builtin-check.c @@ -27,15 +27,15 @@ struct feature_status supported_features[] = { FEATURE_STATUS("bpf", HAVE_LIBBPF_SUPPORT), FEATURE_STATUS("bpf_skeletons", HAVE_BPF_SKEL), FEATURE_STATUS("debuginfod", HAVE_DEBUGINFOD_SUPPORT), - FEATURE_STATUS("dwarf", HAVE_DWARF_SUPPORT), - FEATURE_STATUS("dwarf_getlocations", HAVE_DWARF_GETLOCATIONS_SUPPORT), + FEATURE_STATUS("dwarf", HAVE_LIBDW_SUPPORT), + FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT), FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT), FEATURE_STATUS("libaudit", HAVE_LIBAUDIT_SUPPORT), FEATURE_STATUS("libbfd", HAVE_LIBBFD_SUPPORT), FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT), FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT), - FEATURE_STATUS("libdw-dwarf-unwind", HAVE_DWARF_SUPPORT), + FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT), FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT), FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT), diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 23326dd20333..82fb7773e03e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -469,13 +469,13 @@ out: static struct perf_diff pdiff; -static struct evsel *evsel_match(struct evsel *evsel, - struct evlist *evlist) +static struct evsel *evsel_match(struct evsel *evsel, struct evlist *evlist) { struct evsel *e; evlist__for_each_entry(evlist, e) { - if (evsel__match2(evsel, e)) + if ((evsel->core.attr.type == e->core.attr.type) && + (evsel->core.attr.config == e->core.attr.config)) return e; } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index abcdc49b7a98..a56cf8b0a7d4 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -815,7 +815,7 @@ static void display_histogram(int buckets[], bool use_nsec) bar_len = buckets[0] * bar_total / total; printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + 0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); for (i = 1; i < NUM_BUCKET - 1; i++) { int start = (1 << (i - 1)); @@ -1151,8 +1151,9 @@ static int cmp_profile_data(const void *a, const void *b) if (v1 > v2) return -1; - else + if (v1 < v2) return 1; + return 0; } static void print_profile_result(struct perf_ftrace *ftrace) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a756147e2eec..4d8d94146f8d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -36,7 +36,7 @@ #include <regex.h> #include <linux/ctype.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 55ea17c5ff02..274568d712d1 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1226,7 +1226,9 @@ static int cpu_isa_config(struct perf_kvm_stat *kvm) int err; if (kvm->live) { - err = get_cpuid(buf, sizeof(buf)); + struct perf_cpu cpu = {-1}; + + err = get_cpuid(buf, sizeof(buf), cpu); if (err != 0) { pr_err("Failed to look up CPU type: %s\n", str_error_r(err, buf, sizeof(buf))); @@ -2147,6 +2149,7 @@ int cmd_kvm(int argc, const char **argv) "buildid-list", "stat", NULL }; const char *kvm_usage[] = { NULL, NULL }; + exclude_GH_default = true; perf_host = 0; perf_guest = 1; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index c1daf82c9b92..8234410cba4c 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -23,7 +23,7 @@ #include <subcmd/pager.h> #include <subcmd/parse-options.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <errno.h> #include <inttypes.h> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 65b8cba324be..9e7fdfcdd7ff 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,6 +19,7 @@ #include "util/string2.h" #include "util/strlist.h" #include "util/strbuf.h" +#include "util/tool_pmu.h" #include <subcmd/pager.h> #include <subcmd/parse-options.h> #include <linux/zalloc.h> @@ -112,7 +113,7 @@ static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) } } -static void default_print_event(void *ps, const char *pmu_name, const char *topic, +static void default_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit __maybe_unused, bool deprecated, const char *event_type_desc, @@ -353,7 +354,7 @@ static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, .. fputs(buf->buf, fp); } -static void json_print_event(void *ps, const char *pmu_name, const char *topic, +static void json_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -614,9 +615,18 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { + char *old_pmu_glob = default_ps.pmu_glob; + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); + default_ps.pmu_glob = strdup("tool"); + if (!default_ps.pmu_glob) { + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); + default_ps.pmu_glob = old_pmu_glob; } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(&print_cb, ps); @@ -664,7 +674,6 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); print_hwcache_events(&print_cb, ps); perf_pmus__print_pmu_events(&print_cb, ps); print_tracepoint_events(&print_cb, ps); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 003a3bcebfdf..69800e4d9530 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -229,7 +229,7 @@ static int opt_set_target_ns(const struct option *opt __maybe_unused, /* Command option callbacks */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int opt_show_lines(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -505,7 +505,7 @@ out: return ret; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #define PROBEDEF_STR \ "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]" #else @@ -521,7 +521,7 @@ __cmd_probe(int argc, const char **argv) "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [<options>] --del '[GROUP:]EVENT' ...", "perf probe --list [GROUP:]EVENT ...", -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "perf probe [<options>] --line 'LINEDESC'", "perf probe [<options>] --vars 'PROBEPOINT'", #endif @@ -545,7 +545,7 @@ __cmd_probe(int argc, const char **argv) "\t\tFUNC:\tFunction name\n" "\t\tOFF:\tOffset from function entry (in byte)\n" "\t\t%return:\tPut the probe at function return\n" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "\t\tSRC:\tSource code path\n" "\t\tRL:\tRelative line number from function entry.\n" "\t\tAL:\tAbsolute line number in file.\n" @@ -612,11 +612,11 @@ __cmd_probe(int argc, const char **argv) set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); #else -# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c) +# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_LIBDW=1", c) set_nobuild('L', "line", false); set_nobuild('V', "vars", false); set_nobuild('\0', "externs", false); @@ -694,7 +694,7 @@ __cmd_probe(int argc, const char **argv) if (ret < 0) pr_err_with_code(" Error: Failed to show functions.", ret); return ret; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT case 'L': ret = show_line_range(¶ms->line_range, params->target, params->nsi, params->uprobes); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adbaf80b398c..f83252472921 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -4157,9 +4157,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0) { - bool can_profile_kernel = perf_event_paranoid_check(1); - - err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); + err = parse_event(rec->evlist, "cycles:P"); if (err) goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5dc17ffee27a..048c91960ba9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -70,7 +70,7 @@ #include <linux/mman.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct report { @@ -455,7 +455,7 @@ static int report__setup_sample_type(struct report *rep) if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_LIBDW_SUPPORT) if (dwarf_callchain_users) { ui__warning("Please install libunwind or libdw " "development packages during the perf build.\n"); @@ -1271,6 +1271,10 @@ static int process_attr(const struct perf_tool *tool __maybe_unused, return 0; } +#define CALLCHAIN_BRANCH_SORT_ORDER \ + "srcline,symbol,dso,callchain_branch_predicted," \ + "callchain_branch_abort,callchain_branch_cycles" + int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -1639,7 +1643,7 @@ repeat: symbol_conf.use_callchain = true; callchain_register_param(&callchain_param); if (sort_order == NULL) - sort_order = "srcline,symbol,dso"; + sort_order = CALLCHAIN_BRANCH_SORT_ORDER; } if (report.mem_mode) { @@ -1701,7 +1705,7 @@ repeat: report.data_type = true; annotate_opts.annotate_src = false; -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); goto error; #endif diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5981cc51abc8..7049c60ebf77 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -68,7 +68,6 @@ struct task_desc { struct sched_atom **atoms; pthread_t thread; - sem_t sleep_sem; sem_t ready_for_work; sem_t work_done_sem; @@ -80,12 +79,10 @@ enum sched_event_type { SCHED_EVENT_RUN, SCHED_EVENT_SLEEP, SCHED_EVENT_WAKEUP, - SCHED_EVENT_MIGRATION, }; struct sched_atom { enum sched_event_type type; - int specific_wait; u64 timestamp; u64 duration; unsigned long nr; @@ -228,6 +225,7 @@ struct perf_sched { bool show_wakeups; bool show_next; bool show_migrations; + bool pre_migrations; bool show_state; bool show_prio; u64 skipped_samples; @@ -247,7 +245,9 @@ struct thread_runtime { u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ u64 dt_delay; /* time between wakeup and sched-in */ + u64 dt_pre_mig; /* time between migration and wakeup */ u64 ready_to_run; /* time of wakeup */ + u64 migrated; /* time when a thread is migrated */ struct stats run_stats; u64 total_run_time; @@ -255,6 +255,7 @@ struct thread_runtime { u64 total_iowait_time; u64 total_preempt_time; u64 total_delay_time; + u64 total_pre_mig_time; char last_state; @@ -421,14 +422,13 @@ static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *t wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); sem_init(wakee_event->wait_sem, 0, 0); - wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; sched->nr_wakeup_events++; } static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, - u64 timestamp, const char task_state __maybe_unused) + u64 timestamp) { struct sched_atom *event = get_new_event(task, timestamp); @@ -468,7 +468,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, * every task starts in sleeping state - this gets ignored * if there's no wakeup pointing to this sleep state: */ - add_sched_event_sleep(sched, task, 0, 0); + add_sched_event_sleep(sched, task, 0); sched->pid_to_task[pid] = task; sched->nr_tasks++; @@ -529,8 +529,6 @@ static void perf_sched__process_event(struct perf_sched *sched, ret = sem_post(atom->wait_sem); BUG_ON(ret); break; - case SCHED_EVENT_MIGRATION: - break; default: BUG_ON(1); } @@ -673,7 +671,6 @@ static void create_tasks(struct perf_sched *sched) parms->task = task = sched->tasks[i]; parms->sched = sched; parms->fd = self_open_counters(sched, i); - sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); task->curr_event = 0; @@ -697,7 +694,6 @@ static void destroy_tasks(struct perf_sched *sched) task = sched->tasks[i]; err = pthread_join(task->thread, NULL); BUG_ON(err); - sem_destroy(&task->sleep_sem); sem_destroy(&task->ready_for_work); sem_destroy(&task->work_done_sem); } @@ -751,7 +747,6 @@ static void wait_for_tasks(struct perf_sched *sched) for (i = 0; i < sched->nr_tasks; i++) { task = sched->tasks[i]; - sem_init(&task->sleep_sem, 0, 0); task->curr_event = 0; } } @@ -852,7 +847,6 @@ static int replay_switch_event(struct perf_sched *sched, *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), next_pid = evsel__intval(evsel, sample, "next_pid"); - const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -884,7 +878,7 @@ static int replay_switch_event(struct perf_sched *sched, sched->cpu_last_switched[cpu] = timestamp; add_sched_event_run(sched, prev, timestamp, delta); - add_sched_event_sleep(sched, prev, timestamp, prev_state); + add_sched_event_sleep(sched, prev, timestamp); return 0; } @@ -1749,7 +1743,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } if (sched->map.comp && new_cpu) - color_fprintf(stdout, color, " (CPU %d)", this_cpu); + color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu); if (proceed != 1) { color_fprintf(stdout, color, "\n"); @@ -2083,14 +2077,15 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "task name", MAX_PRIO_STR_LEN, "prio", - "wait time", "sch delay", "run time"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "task name", "wait time", "sch delay", "run time"); - } + printf(" %-*s", comm_width, "task name"); + + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, "prio"); + + printf(" %9s %9s %9s", "wait time", "sch delay", "run time"); + + if (sched->pre_migrations) + printf(" %9s", "pre-mig time"); if (sched->show_state) printf(" %s", "state"); @@ -2105,17 +2100,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "[tid/pid]", MAX_PRIO_STR_LEN, "", - "(msec)", "(msec)", "(msec)"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "[tid/pid]", "(msec)", "(msec)", "(msec)"); - } + printf(" %-*s", comm_width, "[tid/pid]"); - if (sched->show_state) - printf(" %5s", ""); + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, ""); + + printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)"); + + if (sched->pre_migrations) + printf(" %9s", "(msec)"); printf("\n"); @@ -2127,15 +2120,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - if (sched->show_prio) { - printf(" %.*s %.*s %.9s %.9s %.9s", - comm_width, graph_dotted_line, MAX_PRIO_STR_LEN, graph_dotted_line, - graph_dotted_line, graph_dotted_line, graph_dotted_line); - } else { - printf(" %.*s %.9s %.9s %.9s", comm_width, - graph_dotted_line, graph_dotted_line, graph_dotted_line, - graph_dotted_line); - } + printf(" %.*s", comm_width, graph_dotted_line); + + if (sched->show_prio) + printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line); + + printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line); + + if (sched->pre_migrations) + printf(" %.9s", graph_dotted_line); if (sched->show_state) printf(" %.5s", graph_dotted_line); @@ -2190,6 +2183,8 @@ static void timehist_print_sample(struct perf_sched *sched, print_sched_time(tr->dt_delay, 6); print_sched_time(tr->dt_run, 6); + if (sched->pre_migrations) + print_sched_time(tr->dt_pre_mig, 6); if (sched->show_state) printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); @@ -2227,18 +2222,21 @@ out: * last_time = time of last sched change event for current task * (i.e, time process was last scheduled out) * ready_to_run = time of wakeup for current task + * migrated = time of task migration to another CPU * - * -----|------------|------------|------------|------ - * last ready tprev t + * -----|-------------|-------------|-------------|-------------|----- + * last ready migrated tprev t * time to run * - * |-------- dt_wait --------| - * |- dt_delay -|-- dt_run --| + * |---------------- dt_wait ----------------| + * |--------- dt_delay ---------|-- dt_run --| + * |- dt_pre_mig -| * - * dt_run = run time of current task - * dt_wait = time between last schedule out event for task and tprev - * represents time spent off the cpu - * dt_delay = time between wakeup and schedule-in of task + * dt_run = run time of current task + * dt_wait = time between last schedule out event for task and tprev + * represents time spent off the cpu + * dt_delay = time between wakeup and schedule-in of task + * dt_pre_mig = time between wakeup and migration to another CPU */ static void timehist_update_runtime_stats(struct thread_runtime *r, @@ -2249,6 +2247,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->dt_iowait = 0; r->dt_preempt = 0; r->dt_run = 0; + r->dt_pre_mig = 0; if (tprev) { r->dt_run = t - tprev; @@ -2257,6 +2256,9 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); else r->dt_delay = tprev - r->ready_to_run; + + if ((r->migrated > r->ready_to_run) && (r->migrated < tprev)) + r->dt_pre_mig = r->migrated - r->ready_to_run; } if (r->last_time > tprev) @@ -2280,6 +2282,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_sleep_time += r->dt_sleep; r->total_iowait_time += r->dt_iowait; r->total_preempt_time += r->dt_preempt; + r->total_pre_mig_time += r->dt_pre_mig; } static bool is_idle_sample(struct perf_sample *sample, @@ -2693,9 +2696,13 @@ static int timehist_migrate_task_event(const struct perf_tool *tool, return -1; tr->migrations++; + tr->migrated = sample->time; /* show migrations if requested */ - timehist_print_migration_event(sched, evsel, sample, machine, thread); + if (sched->show_migrations) { + timehist_print_migration_event(sched, evsel, sample, + machine, thread); + } return 0; } @@ -2846,11 +2853,13 @@ out: /* last state is used to determine where to account wait time */ tr->last_state = state; - /* sched out event for task so reset ready to run time */ + /* sched out event for task so reset ready to run time and migrated time */ if (state == 'R') tr->ready_to_run = t; else tr->ready_to_run = 0; + + tr->migrated = 0; } evsel__save_time(evsel, sample->time, sample->cpu); @@ -3290,8 +3299,8 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; } - if (sched->show_migrations && - perf_session__set_tracepoints_handlers(session, migrate_handlers)) + if ((sched->show_migrations || sched->pre_migrations) && + perf_session__set_tracepoints_handlers(session, migrate_handlers)) goto out; /* pre-allocate struct for per-CPU idle stats */ @@ -3833,6 +3842,7 @@ int cmd_sched(int argc, const char **argv) OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), OPT_STRING(0, "prio", &sched.prio_str, "prio", "analyze events only for given task priority(ies)"), + OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"), OPT_PARENT(sched_options) }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a644787fa9e1..9e47905f75a6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -67,7 +67,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static char const *script_name; @@ -1728,6 +1728,7 @@ static struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"}, {0, NULL} }; @@ -2136,11 +2137,11 @@ struct metric_ctx { }; static void script_print_metric(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color, - const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct metric_ctx *mctx = ctx; + const char *color = metric_threshold_classify__color(thresh); if (!fmt) return; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 689a3d43c258..fdf5172646a5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,6 +46,7 @@ #include "util/parse-events.h" #include "util/pmus.h" #include "util/pmu.h" +#include "util/tool_pmu.h" #include "util/event.h" #include "util/evlist.h" #include "util/evsel.h" @@ -294,14 +295,14 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa * terminates. Use the wait4 values in that case. */ if (err && cpu_map_idx == 0 && - (evsel__tool_event(counter) == PERF_TOOL_USER_TIME || - evsel__tool_event(counter) == PERF_TOOL_SYSTEM_TIME)) { + (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME || + evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) { u64 val, *start_time; struct perf_counts_values *count = perf_counts(counter->counts, cpu_map_idx, thread); start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread); - if (evsel__tool_event(counter) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME) val = ru_stats.ru_utime_usec_stat.mean; else val = ru_stats.ru_stime_usec_stat.mean; @@ -639,8 +640,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) * (behavior changed with commit b0a873e). */ if (errno == EINVAL || errno == ENOSYS || - errno == ENOENT || errno == EOPNOTSUPP || - errno == ENXIO) { + errno == ENOENT || errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", evsel__name(counter)); @@ -658,7 +658,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; - } else if (target__has_per_thread(&target) && + } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP && evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) { /* @@ -679,6 +679,19 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) return COUNTER_SKIP; } + if (errno == EOPNOTSUPP) { + if (verbose > 0) { + ui__warning("%s event is not supported by the kernel.\n", + evsel__name(counter)); + } + counter->supported = false; + counter->errored = true; + + if ((evsel__leader(counter) != counter) || + !(counter->core.leader->nr_members > 1)) + return COUNTER_SKIP; + } + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); @@ -716,15 +729,19 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return -1; + if (affinity__setup(&saved_affinity) < 0) { + err = -1; + goto err_out; + } affinity = &saved_affinity; } evlist__for_each_entry(evsel_list, counter) { counter->reset_group = false; - if (bpf_counter__load(counter, &target)) - return -1; + if (bpf_counter__load(counter, &target)) { + err = -1; + goto err_out; + } if (!(evsel__is_bperf(counter))) all_counters_use_bpf = false; } @@ -767,7 +784,8 @@ try_again: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again; case COUNTER_SKIP: @@ -808,7 +826,8 @@ try_again_reset: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again_reset; case COUNTER_SKIP: @@ -821,6 +840,7 @@ try_again_reset: } } affinity__cleanup(affinity); + affinity = NULL; evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { @@ -833,8 +853,10 @@ try_again_reset: stat_config.unit_width = l; if (evsel__should_store_id(counter) && - evsel__store_ids(counter, evsel_list)) - return -1; + evsel__store_ids(counter, evsel_list)) { + err = -1; + goto err_out; + } } if (evlist__apply_filters(evsel_list, &counter, &target)) { @@ -855,20 +877,23 @@ try_again_reset: } if (err < 0) - return err; + goto err_out; err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, process_synthesized_event, is_pipe); if (err < 0) - return err; + goto err_out; + } if (target.initial_delay) { pr_info(EVLIST_DISABLED_MSG); } else { err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } } /* Exec the command, if any */ @@ -878,8 +903,10 @@ try_again_reset: if (target.initial_delay > 0) { usleep(target.initial_delay * USEC_PER_MSEC); err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } pr_info(EVLIST_ENABLED_MSG); } @@ -899,7 +926,8 @@ try_again_reset: if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); - return -1; + err = -1; + goto err_out; } if (WIFSIGNALED(status)) @@ -946,8 +974,23 @@ try_again_reset: evlist__close(evsel_list); return WEXITSTATUS(status); + +err_out: + if (forks) + evlist__cancel_workload(evsel_list); + + affinity__cleanup(affinity); + return err; } +/* + * Returns -1 for fatal errors which signifies to not continue + * when in repeat mode. + * + * Returns < -1 error codes when stat record is used. These + * result in the stat information being displayed, but writing + * to the file fails and is non fatal. + */ static int run_perf_stat(int argc, const char **argv, int run_idx) { int ret; @@ -1814,130 +1857,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } /* - * Add default attributes, if there were no attributes specified or + * Add default events, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ -static int add_default_attributes(void) +static int add_default_events(void) { - struct perf_event_attr default_attrs0[] = { - - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, -}; - struct perf_event_attr frontend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, -}; - struct perf_event_attr backend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, -}; - struct perf_event_attr default_attrs1[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, - -}; - -/* - * Detailed stats (-d), covering the L1 and last level data caches: - */ - struct perf_event_attr detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; - -/* - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: - */ - struct perf_event_attr very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - -}; + const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + struct parse_events_error err; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + int ret = 0; -/* - * Very, very detailed stats (-d -d -d), adding prefetch events: - */ - struct perf_event_attr very_very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; + if (!evlist) + return -ENOMEM; - struct perf_event_attr default_null_attrs[] = {}; - const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + parse_events_error__init(&err); /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) - return 0; + goto out; if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics @@ -1947,9 +1885,10 @@ static int add_default_attributes(void) */ if (!metricgroup__has_metric(pmu, "transaction")) { pr_err("Missing transaction metrics\n"); - return -1; + ret = -1; + goto out; } - return metricgroup__parse_groups(evsel_list, pmu, "transaction", + ret = metricgroup__parse_groups(evlist, pmu, "transaction", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1957,6 +1896,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (smi_cost) { @@ -1964,26 +1904,29 @@ static int add_default_attributes(void) if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { pr_err("freeze_on_smi is not supported.\n"); - return -1; + ret = -1; + goto out; } if (!smi) { if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { - fprintf(stderr, "Failed to set freeze_on_smi.\n"); - return -1; + pr_err("Failed to set freeze_on_smi.\n"); + ret = -1; + goto out; } smi_reset = true; } if (!metricgroup__has_metric(pmu, "smi")) { pr_err("Missing smi metrics\n"); - return -1; + ret = -1; + goto out; } if (!force_metric_only) stat_config.metric_only = true; - return metricgroup__parse_groups(evsel_list, pmu, "smi", + ret = metricgroup__parse_groups(evlist, pmu, "smi", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1991,6 +1934,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (topdown_run) { @@ -2003,21 +1947,23 @@ static int add_default_attributes(void) if (!max_level) { pr_err("Topdown requested but the topdown metric groups aren't present.\n" "(See perf list the metric groups have names like TopdownL1)\n"); - return -1; + ret = -1; + goto out; } if (stat_config.topdown_level > max_level) { pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level); - return -1; - } else if (!stat_config.topdown_level) + ret = -1; + goto out; + } else if (!stat_config.topdown_level) { stat_config.topdown_level = 1; - + } if (!stat_config.interval && !stat_config.metric_only) { fprintf(stat_config.output, "Topdown accuracy may decrease when measuring long periods.\n" "Please print the result regularly, e.g. -I1000\n"); } str[8] = stat_config.topdown_level + '0'; - if (metricgroup__parse_groups(evsel_list, + if (metricgroup__parse_groups(evlist, pmu, str, /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2025,41 +1971,49 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; + } } if (!stat_config.topdown_level) stat_config.topdown_level = 1; - if (!evsel_list->core.nr_entries) { + if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { /* No events so add defaults. */ if (target__has_cpu(&target)) - default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + ret = parse_events(evlist, "cpu-clock", &err); + else + ret = parse_events(evlist, "task-clock", &err); + if (ret) + goto out; + + ret = parse_events(evlist, + "context-switches," + "cpu-migrations," + "page-faults," + "instructions," + "cycles," + "stalled-cycles-frontend," + "stalled-cycles-backend," + "branches," + "branch-misses", + &err); + if (ret) + goto out; - if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) - return -1; - if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) { - if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) - return -1; - } - if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) { - if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) - return -1; - } - if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) - return -1; /* * Add TopdownL1 metrics if they exist. To minimize * multiplexing, don't request threshold computation. */ if (metricgroup__has_metric(pmu, "Default")) { struct evlist *metric_evlist = evlist__new(); - struct evsel *metric_evsel; - - if (!metric_evlist) - return -1; + if (!metric_evlist) { + ret = -ENOMEM; + goto out; + } if (metricgroup__parse_groups(metric_evlist, pmu, "Default", /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2067,43 +2021,71 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; - - evlist__for_each_entry(metric_evlist, metric_evsel) { - metric_evsel->skippable = true; - metric_evsel->default_metricgroup = true; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; } - evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries); + + evlist__for_each_entry(metric_evlist, evsel) + evsel->default_metricgroup = true; + + evlist__splice_list_tail(evlist, &metric_evlist->core.entries); evlist__delete(metric_evlist); } - - /* Platform specific attrs */ - if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0) - return -1; } /* Detailed events get appended to the event list: */ - if (detailed_run < 1) - return 0; - - /* Append detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) - return -1; - - if (detailed_run < 2) - return 0; - - /* Append very detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) - return -1; - - if (detailed_run < 3) - return 0; - - /* Append very, very detailed run extra attributes: */ - return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + if (!ret && detailed_run >= 1) { + /* + * Detailed stats (-d), covering the L1 and last level data + * caches: + */ + ret = parse_events(evlist, + "L1-dcache-loads," + "L1-dcache-load-misses," + "LLC-loads," + "LLC-load-misses", + &err); + } + if (!ret && detailed_run >= 2) { + /* + * Very detailed stats (-d -d), covering the instruction cache + * and the TLB caches: + */ + ret = parse_events(evlist, + "L1-icache-loads," + "L1-icache-load-misses," + "dTLB-loads," + "dTLB-load-misses," + "iTLB-loads," + "iTLB-load-misses", + &err); + } + if (!ret && detailed_run >= 3) { + /* + * Very, very detailed stats (-d -d -d), adding prefetch events: + */ + ret = parse_events(evlist, + "L1-dcache-prefetches," + "L1-dcache-prefetch-misses", + &err); + } +out: + if (!ret) { + evlist__for_each_entry(evlist, evsel) { + /* + * Make at least one event non-skippable so fatal errors are visible. + * 'cycles' always used to be default and non-skippable, so use that. + */ + if (strcmp("cycles", evsel__name(evsel))) + evsel->skippable = true; + } + } + parse_events_error__exit(&err); + evlist__splice_list_tail(evsel_list, &evlist->core.entries); + evlist__delete(evlist); + return ret; } static const char * const stat_record_usage[] = { @@ -2591,6 +2573,14 @@ int cmd_stat(int argc, const char **argv) goto out; } + if (stat_config.csv_output || (stat_config.metric_only && stat_config.json_output)) { + /* + * Current CSV and metric-only JSON output doesn't display the + * metric threshold so don't compute it. + */ + stat_config.metric_no_threshold = true; + } + if (stat_config.walltime_run_table && stat_config.run_count <= 1) { fprintf(stderr, "--table is only supported with -r\n"); parse_options_usage(stat_usage, stat_options, "r", 1); @@ -2651,6 +2641,7 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + target.inherit = !stat_config.no_inherit; err = target__validate(&target); if (err) { target__strerror(&target, err, errbuf, BUFSIZ); @@ -2760,7 +2751,7 @@ int cmd_stat(int argc, const char **argv) } } - if (add_default_attributes()) + if (add_default_events()) goto out; if (stat_config.cgroup_list) { @@ -2879,7 +2870,10 @@ int cmd_stat(int argc, const char **argv) evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1 && !interval) { + if (status == -1) + break; + + if (forever && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 218c8b44d7be..068d297aaf44 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -38,7 +38,7 @@ #include "util/tracepoint.h" #include "util/util.h" #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -1158,7 +1158,6 @@ static void draw_io_bars(struct timechart *tchart) } svg_box(Y, c->start_time, c->end_time, "process3"); - sample = c->io_samples; for (sample = c->io_samples; sample; sample = sample->next) { double h = (double)sample->bytes / c->max_bytes; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d3f11b90d025..6a1a128fe645 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -88,7 +88,7 @@ #include <perf/mmap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #ifndef O_CLOEXEC @@ -1873,7 +1873,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, switch (event->header.type) { case PERF_RECORD_LOST: color_fprintf(trace->output, PERF_COLOR_RED, - "LOST %" PRIu64 " events!\n", event->lost.lost); + "LOST %" PRIu64 " events!\n", (u64)event->lost.lost); ret = machine__process_lost_event(machine, event, sample); break; default: @@ -2702,6 +2702,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, char msg[1024]; void *args, *augmented_args = NULL; int augmented_args_size; + size_t printed = 0; if (sc == NULL) return -1; @@ -2717,8 +2718,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size); - syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); - fprintf(trace->output, "%s", msg); + printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); + fprintf(trace->output, "%.*s", (int)printed, msg); err = 0; out_put: thread__put(thread); @@ -3087,7 +3088,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%s", bf); + return printed + fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -3096,13 +3097,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, { struct thread *thread; int callchain_ret = 0; - /* - * Check if we called perf_evsel__disable(evsel) due to, for instance, - * this event's max_events having been hit and this is an entry coming - * from the ring buffer that we should discard, since the max events - * have already been considered/printed. - */ - if (evsel->disabled) + + if (evsel->nr_events_printed >= evsel->max_events) return 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); @@ -4326,6 +4322,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) sizeof(__u32), BPF_ANY); } } + + if (trace->skel) + trace->filter_pids.map = trace->skel->maps.pids_filtered; #endif err = trace__set_filter_pids(trace); if (err < 0) @@ -5449,6 +5448,10 @@ init_augmented_syscall_tp: if (trace.summary_only) trace.summary = trace.summary_only; + /* Keep exited threads, otherwise information might be lost for summary */ + if (trace.summary) + symbol_conf.keep_exited_threads = true; + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { diff --git a/tools/perf/check-header_ignore_hunks/lib/list_sort.c b/tools/perf/check-header_ignore_hunks/lib/list_sort.c index 32d98cb34f80..b7316d29857d 100644 --- a/tools/perf/check-header_ignore_hunks/lib/list_sort.c +++ b/tools/perf/check-header_ignore_hunks/lib/list_sort.c @@ -1,11 +1,4 @@ -@@ -1,5 +1,6 @@ - // SPDX-License-Identifier: GPL-2.0 - #include <linux/kernel.h> -+#include <linux/bug.h> - #include <linux/compiler.h> - #include <linux/export.h> - #include <linux/string.h> -@@ -52,6 +53,7 @@ +@@ -50,6 +50,7 @@ struct list_head *a, struct list_head *b) { struct list_head *tail = head; @@ -13,7 +6,7 @@ for (;;) { /* if equal, take 'a' -- important for sort stability */ -@@ -77,6 +79,15 @@ +@@ -75,6 +76,15 @@ /* Finish linking remainder of list b on to tail */ tail->next = b; do { diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 4083b1abeaab..4ca2d7b2ea6c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -220,7 +220,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 32ff619e881c..00d73a16c4fd 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -235,7 +235,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4def800f4089..a2987f2cfe1a 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -542,8 +542,6 @@ int main(int argc, const char **argv) } cmd = argv[0]; - test_attr__init(); - /* * We use PATH to find perf commands, but we prepend some higher * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json new file mode 100644 index 000000000000..74ac12660a29 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json @@ -0,0 +1,9 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx91_ddr.cycles", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json new file mode 100644 index 000000000000..f0c5911eb2d0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "bandwidth usage for lpddr4 evk board", + "MetricName": "imx91_bandwidth_usage.lpddr4", + "MetricExpr": "(((( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8 ) + (( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32 )) / duration_time) / (2400 * 1000000 * 2)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters read from ddr", + "MetricName": "imx91_ddr_read.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters write to ddr", + "MetricName": "imx91_ddr_write.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json index 126ce980f6f2..45a0d51dfb63 100644 --- a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json @@ -8,6 +8,14 @@ "Compat": "imx95" }, { + "BriefDescription": "bandwidth usage for lpddr4x evk board", + "MetricName": "imx95_bandwidth_usage.lpddr4x", + "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4000 * 1000000 * 4)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx95" + }, + { "BriefDescription": "bytes of all masters read from ddr", "MetricName": "imx95_ddr_read.all", "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32", diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index 6463531b9941..b6a0d2de8534 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -3,235 +3,235 @@ "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", "PublicDescription": "Frontend bound L1 topdown metric", "BriefDescription": "Frontend bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "frontend_bound" }, { "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", "PublicDescription": "Bad Speculation L1 topdown metric", "BriefDescription": "Bad Speculation L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "bad_speculation" }, { "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", "PublicDescription": "Retiring L1 topdown metric", "BriefDescription": "Retiring L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "retiring" }, { "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", "PublicDescription": "Backend Bound L1 topdown metric", "BriefDescription": "Backend Bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "backend_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", "PublicDescription": "Fetch latency bound L2 topdown metric", "BriefDescription": "Fetch latency bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_latency_bound" }, { "MetricExpr": "frontend_bound - fetch_latency_bound", "PublicDescription": "Fetch bandwidth bound L2 topdown metric", "BriefDescription": "Fetch bandwidth bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_bandwidth_bound" }, { "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", "PublicDescription": "Branch mispredicts L2 topdown metric", "BriefDescription": "Branch mispredicts L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "branch_mispredicts" }, { "MetricExpr": "bad_speculation - branch_mispredicts", "PublicDescription": "Machine clears L2 topdown metric", "BriefDescription": "Machine clears L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "machine_clears" }, { "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", "PublicDescription": "Core bound L2 topdown metric", "BriefDescription": "Core bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "core_bound" }, { "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", "PublicDescription": "Memory bound L2 topdown metric", "BriefDescription": "Memory bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "memory_bound" }, { "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by itlb miss L3 topdown metric", "BriefDescription": "Idle by itlb miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_itlb_miss" }, { "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by icache miss L3 topdown metric", "BriefDescription": "Idle by icache miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_icache_miss" }, { "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", "PublicDescription": "BP misp flush L3 topdown metric", "BriefDescription": "BP misp flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "bp_misp_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", "PublicDescription": "OOO flush L3 topdown metric", "BriefDescription": "OOO flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ooo_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", "PublicDescription": "Static predictor flush L3 topdown metric", "BriefDescription": "Static predictor flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sp_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", "PublicDescription": "Indirect branch L3 topdown metric", "BriefDescription": "Indirect branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "indirect_branch" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1013@ + armv8_pmuv3_0@event\\=0x1016@) / BR_MIS_PRED", "PublicDescription": "Push branch L3 topdown metric", "BriefDescription": "Push branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "push_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x100d@ / BR_MIS_PRED", "PublicDescription": "Pop branch L3 topdown metric", "BriefDescription": "Pop branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pop_branch" }, { "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1013@ - armv8_pmuv3_0@event\\=0x1016@ - armv8_pmuv3_0@event\\=0x100d@) / BR_MIS_PRED", "PublicDescription": "Other branch L3 topdown metric", "BriefDescription": "Other branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", "PublicDescription": "Nuke flush L3 topdown metric", "BriefDescription": "Nuke flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "nuke_flush" }, { "MetricExpr": "1 - nuke_flush", "PublicDescription": "Other flush L3 topdown metric", "BriefDescription": "Other flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", "PublicDescription": "Sync stall L3 topdown metric", "BriefDescription": "Sync stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sync_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", "PublicDescription": "Rob stall L3 topdown metric", "BriefDescription": "Rob stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "rob_stall" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", "PublicDescription": "Ptag stall L3 topdown metric", "BriefDescription": "Ptag stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ptag_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", "PublicDescription": "SaveOpQ stall L3 topdown metric", "BriefDescription": "SaveOpQ stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "saveopq_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", "PublicDescription": "PC buffer stall L3 topdown metric", "BriefDescription": "PC buffer stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pc_buffer_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", "PublicDescription": "Divider L3 topdown metric", "BriefDescription": "Divider L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "divider" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", "PublicDescription": "FSU stall L3 topdown metric", "BriefDescription": "FSU stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "fsu_stall" }, { "MetricExpr": "core_bound - divider - fsu_stall", "PublicDescription": "EXE ports util L3 topdown metric", "BriefDescription": "EXE ports util L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "exe_ports_util" }, { "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", "PublicDescription": "L1 bound L3 topdown metric", "BriefDescription": "L1 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l1_bound" }, { "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", "PublicDescription": "L2 bound L3 topdown metric", "BriefDescription": "L2 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l2_bound" }, { "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", "PublicDescription": "Mem bound L3 topdown metric", "BriefDescription": "Mem bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "mem_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", "PublicDescription": "Store bound L3 topdown metric", "BriefDescription": "Store bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "store_bound" } ] diff --git a/tools/perf/pmu-events/arch/common/common/tool.json b/tools/perf/pmu-events/arch/common/common/tool.json new file mode 100644 index 000000000000..12f2ef1813a6 --- /dev/null +++ b/tools/perf/pmu-events/arch/common/common/tool.json @@ -0,0 +1,74 @@ +[ + { + "Unit": "tool", + "EventName": "duration_time", + "BriefDescription": "Wall clock interval time in nanoseconds", + "ConfigCode": "1" + }, + { + "Unit": "tool", + "EventName": "user_time", + "BriefDescription": "User (non-kernel) time in nanoseconds", + "ConfigCode": "2" + }, + { + "Unit": "tool", + "EventName": "system_time", + "BriefDescription": "System/kernel time in nanoseconds", + "ConfigCode": "3" + }, + { + "Unit": "tool", + "EventName": "has_pmem", + "BriefDescription": "1 if persistent memory installed otherwise 0", + "ConfigCode": "4" + }, + { + "Unit": "tool", + "EventName": "num_cores", + "BriefDescription": "Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU", + "ConfigCode": "5" + }, + { + "Unit": "tool", + "EventName": "num_cpus", + "BriefDescription": "Number of logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "6" + }, + { + "Unit": "tool", + "EventName": "num_cpus_online", + "BriefDescription": "Number of online logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "7" + }, + { + "Unit": "tool", + "EventName": "num_dies", + "BriefDescription": "Number of dies. Each die has 1 or more cores", + "ConfigCode": "8" + }, + { + "Unit": "tool", + "EventName": "num_packages", + "BriefDescription": "Number of packages. Each package has 1 or more die", + "ConfigCode": "9" + }, + { + "Unit": "tool", + "EventName": "slots", + "BriefDescription": "Number of functional units that in parallel can execute parts of an instruction", + "ConfigCode": "10" + }, + { + "Unit": "tool", + "EventName": "smt_on", + "BriefDescription": "1 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0", + "ConfigCode": "11" + }, + { + "Unit": "tool", + "EventName": "system_tsc_freq", + "BriefDescription": "The amount a Time Stamp Counter (TSC) increases per second", + "ConfigCode": "12" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json new file mode 100644 index 000000000000..6f5e8efcb098 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json @@ -0,0 +1,117 @@ +[ + { + "EventCode": "0x600F4", + "EventName": "PM_CYC", + "BriefDescription": "Processor cycles." + }, + { + "EventCode": "0x100F2", + "EventName": "PM_CYC_INST_CMPL", + "BriefDescription": "1 or more ppc insts finished" + }, + { + "EventCode": "0x100f4", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operations Finished." + }, + { + "EventCode": "0x100F6", + "EventName": "PM_L1_ITLB_MISS", + "BriefDescription": "Number of I-ERAT reloads." + }, + { + "EventCode": "0x100F8", + "EventName": "PM_NO_INST_AVAIL", + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread." + }, + { + "EventCode": "0x100fc", + "EventName": "PM_LD_CMPL", + "BriefDescription": "Load instruction completed." + }, + { + "EventCode": "0x200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue)." + }, + { + "EventCode": "0x200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "PowerPC instruction dispatched." + }, + { + "EventCode": "0x200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Processor cycles gated by the run latch." + }, + { + "EventCode": "0x200F6", + "EventName": "PM_L1_DTLB_RELOAD", + "BriefDescription": "DERAT Reloaded due to a DERAT miss." + }, + { + "EventCode": "0x200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "Branch Taken instruction completed." + }, + { + "EventCode": "0x200FC", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand instruction cache miss." + }, + { + "EventCode": "0x200FE", + "EventName": "PM_L1_RELOAD_FROM_MEM", + "BriefDescription": "L1 Dcache reload from memory" + }, + { + "EventCode": "0x300F0", + "EventName": "PM_ST_MISS_L1", + "BriefDescription": "Store Missed L1" + }, + { + "EventCode": "0x300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "Data PTEG reload" + }, + { + "EventCode": "0x300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)" + }, + { + "EventCode": "0x400F0", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "L1 Dcache load miss" + }, + { + "EventCode": "0x400F2", + "EventName": "PM_CYC_INST_DISP", + "BriefDescription": "Cycle when instruction(s) dispatched." + }, + { + "EventCode": "0x400F6", + "EventName": "PM_BR_MPRED_CMPL", + "BriefDescription": "A mispredicted branch completed. Includes direction and target." + }, + { + "EventCode": "0x400FA", + "EventName": "PM_RUN_INST_CMPL", + "BriefDescription": "PowerPC instruction completed while the run latch is set." + }, + { + "EventCode": "0x400FC", + "EventName": "PM_ITLB_MISS", + "BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses." + }, + { + "EventCode": "0x400fe", + "EventName": "PM_LD_NOT_CACHED", + "BriefDescription": "Load data not cached." + }, + { + "EventCode": "0x500fa", + "EventName": "PM_INST_CMPL", + "BriefDescription": "Instructions." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index 4d5e9138d4cc..cbd3cb443784 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -16,3 +16,4 @@ 0x004e[[:xdigit:]]{4},1,power9,core 0x0080[[:xdigit:]]{4},1,power10,core 0x0082[[:xdigit:]]{4},1,power10,core +0x00ffffff,1,compat,core diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json new file mode 100644 index 000000000000..fa06569d881d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json @@ -0,0 +1,1634 @@ +[ + { + "EventName": "local_or_remote_socket_read_data_beats_dram_0", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_1", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_2", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_3", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_4", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_5", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_6", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_7", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_8", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_9", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_10", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_11", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_0", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_1", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_2", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_3", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_4", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_5", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json index af2fdf1f55d6..ff6627a77805 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json @@ -97,6 +97,12 @@ "UMask": "0x02" }, { + "EventName": "ls_dmnd_fills_from_sys.local_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_dmnd_fills_from_sys.near_cache", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -115,12 +121,30 @@ "UMask": "0x10" }, { + "EventName": "ls_dmnd_fills_from_sys.remote_cache", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_dmnd_fills_from_sys.dram_io_far", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_dmnd_fills_from_sys.dram_io_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_dmnd_fills_from_sys.far_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_dmnd_fills_from_sys.alternate_memories", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from extension memory.", @@ -193,12 +217,6 @@ "UMask": "0x50" }, { - "EventName": "ls_any_fills_from_sys.all_dram_io", - "EventCode": "0x44", - "BriefDescription": "Any data cache fills from either DRAM or MMIO in any NUMA node (same or different socket).", - "UMask": "0x48" - }, - { "EventName": "ls_any_fills_from_sys.alternate_memories", "EventCode": "0x44", "BriefDescription": "Any data cache fills from extension memory.", @@ -343,6 +361,12 @@ "UMask": "0x02" }, { + "EventName": "ls_sw_pf_dc_fills.local_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_sw_pf_dc_fills.near_cache", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from cache of another CCX in the same NUMA node.", @@ -361,12 +385,30 @@ "UMask": "0x10" }, { + "EventName": "ls_sw_pf_dc_fills.remote_cache", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_sw_pf_dc_fills.dram_io_far", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_sw_pf_dc_fills.dram_io_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_sw_pf_dc_fills.far_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_sw_pf_dc_fills.alternate_memories", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from extension memory.", @@ -391,6 +433,12 @@ "UMask": "0x02" }, { + "EventName": "ls_hw_pf_dc_fills.local_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_hw_pf_dc_fills.near_cache", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -409,12 +457,30 @@ "UMask": "0x10" }, { + "EventName": "ls_hw_pf_dc_fills.remote_cache", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_hw_pf_dc_fills.dram_io_far", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_hw_pf_dc_fills.dram_io_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_hw_pf_dc_fills.far_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_hw_pf_dc_fills.alternate_memories", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from extension memory.", diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json index c97874039c1e..635d57e3bc15 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json @@ -341,5 +341,117 @@ "MetricGroup": "memory_controller", "PerPkg": "1", "ScaleUnit": "1per_memclk" + }, + { + "MetricName": "dram_read_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM read data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_read_data_beats_dram_0 + local_or_remote_socket_read_data_beats_dram_1 + local_or_remote_socket_read_data_beats_dram_2 + local_or_remote_socket_read_data_beats_dram_3 + local_or_remote_socket_read_data_beats_dram_4 + local_or_remote_socket_read_data_beats_dram_5 + local_or_remote_socket_read_data_beats_dram_6 + local_or_remote_socket_read_data_beats_dram_7 + local_or_remote_socket_read_data_beats_dram_8 + local_or_remote_socket_read_data_beats_dram_9 + local_or_remote_socket_read_data_beats_dram_10 + local_or_remote_socket_read_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_write_data_beats_dram_0 + local_socket_write_data_beats_dram_1 + local_socket_write_data_beats_dram_2 + local_socket_write_data_beats_dram_3 + local_socket_write_data_beats_dram_4 + local_socket_write_data_beats_dram_5 + local_socket_write_data_beats_dram_6 + local_socket_write_data_beats_dram_7 + local_socket_write_data_beats_dram_8 + local_socket_write_data_beats_dram_9 + local_socket_write_data_beats_dram_10 + local_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_write_data_beats_dram_0 + remote_socket_write_data_beats_dram_1 + remote_socket_write_data_beats_dram_2 + remote_socket_write_data_beats_dram_3 + remote_socket_write_data_beats_dram_4 + remote_socket_write_data_beats_dram_5 + remote_socket_write_data_beats_dram_6 + remote_socket_write_data_beats_dram_7 + remote_socket_write_data_beats_dram_8 + remote_socket_write_data_beats_dram_9 + remote_socket_write_data_beats_dram_10 + remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_write_data_beats_dram_0 + local_or_remote_socket_write_data_beats_dram_1 + local_or_remote_socket_write_data_beats_dram_2 + local_or_remote_socket_write_data_beats_dram_3 + local_or_remote_socket_write_data_beats_dram_4 + local_or_remote_socket_write_data_beats_dram_5 + local_or_remote_socket_write_data_beats_dram_6 + local_or_remote_socket_write_data_beats_dram_7 + local_or_remote_socket_write_data_beats_dram_8 + local_or_remote_socket_write_data_beats_dram_9 + local_or_remote_socket_write_data_beats_dram_10 + local_or_remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_read_data_beats_io_0 + local_socket_upstream_read_data_beats_io_1 + local_socket_upstream_read_data_beats_io_2 + local_socket_upstream_read_data_beats_io_3 + local_socket_upstream_read_data_beats_io_4 + local_socket_upstream_read_data_beats_io_5 + local_socket_upstream_read_data_beats_io_6 + local_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_write_data_beats_io_0 + local_socket_upstream_write_data_beats_io_1 + local_socket_upstream_write_data_beats_io_2 + local_socket_upstream_write_data_beats_io_3 + local_socket_upstream_write_data_beats_io_4 + local_socket_upstream_write_data_beats_io_5 + local_socket_upstream_write_data_beats_io_6 + local_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_read_data_beats_io_0 + remote_socket_upstream_read_data_beats_io_1 + remote_socket_upstream_read_data_beats_io_2 + remote_socket_upstream_read_data_beats_io_3 + remote_socket_upstream_read_data_beats_io_4 + remote_socket_upstream_read_data_beats_io_5 + remote_socket_upstream_read_data_beats_io_6 + remote_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_write_data_beats_io_0 + remote_socket_upstream_write_data_beats_io_1 + remote_socket_upstream_write_data_beats_io_2 + remote_socket_upstream_write_data_beats_io_3 + remote_socket_upstream_write_data_beats_io_4 + remote_socket_upstream_write_data_beats_io_5 + remote_socket_upstream_write_data_beats_io_6 + remote_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_inbound_data_beats_cfi_0 + local_socket_inbound_data_beats_cfi_1 + local_socket_inbound_data_beats_cfi_2 + local_socket_inbound_data_beats_cfi_3 + local_socket_inbound_data_beats_cfi_4 + local_socket_inbound_data_beats_cfi_5 + local_socket_inbound_data_beats_cfi_6 + local_socket_inbound_data_beats_cfi_7 + local_socket_inbound_data_beats_cfi_8 + local_socket_inbound_data_beats_cfi_9 + local_socket_inbound_data_beats_cfi_10 + local_socket_inbound_data_beats_cfi_11 + local_socket_inbound_data_beats_cfi_12 + local_socket_inbound_data_beats_cfi_13 + local_socket_inbound_data_beats_cfi_14 + local_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_outbound_data_beats_cfi_0 + local_socket_outbound_data_beats_cfi_1 + local_socket_outbound_data_beats_cfi_2 + local_socket_outbound_data_beats_cfi_3 + local_socket_outbound_data_beats_cfi_4 + local_socket_outbound_data_beats_cfi_5 + local_socket_outbound_data_beats_cfi_6 + local_socket_outbound_data_beats_cfi_7 + local_socket_outbound_data_beats_cfi_8 + local_socket_outbound_data_beats_cfi_9 + local_socket_outbound_data_beats_cfi_10 + local_socket_outbound_data_beats_cfi_11 + local_socket_outbound_data_beats_cfi_12 + local_socket_outbound_data_beats_cfi_13 + local_socket_outbound_data_beats_cfi_14 + local_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_inbound_data_beats_cfi_0 + remote_socket_inbound_data_beats_cfi_1 + remote_socket_inbound_data_beats_cfi_2 + remote_socket_inbound_data_beats_cfi_3 + remote_socket_inbound_data_beats_cfi_4 + remote_socket_inbound_data_beats_cfi_5 + remote_socket_inbound_data_beats_cfi_6 + remote_socket_inbound_data_beats_cfi_7 + remote_socket_inbound_data_beats_cfi_8 + remote_socket_inbound_data_beats_cfi_9 + remote_socket_inbound_data_beats_cfi_10 + remote_socket_inbound_data_beats_cfi_11 + remote_socket_inbound_data_beats_cfi_12 + remote_socket_inbound_data_beats_cfi_13 + remote_socket_inbound_data_beats_cfi_14 + remote_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_outbound_data_beats_cfi_0 + remote_socket_outbound_data_beats_cfi_1 + remote_socket_outbound_data_beats_cfi_2 + remote_socket_outbound_data_beats_cfi_3 + remote_socket_outbound_data_beats_cfi_4 + remote_socket_outbound_data_beats_cfi_5 + remote_socket_outbound_data_beats_cfi_6 + remote_socket_outbound_data_beats_cfi_7 + remote_socket_outbound_data_beats_cfi_8 + remote_socket_outbound_data_beats_cfi_9 + remote_socket_outbound_data_beats_cfi_10 + remote_socket_outbound_data_beats_cfi_11 + remote_socket_outbound_data_beats_cfi_12 + remote_socket_outbound_data_beats_cfi_13 + remote_socket_outbound_data_beats_cfi_14 + remote_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Inbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_inbound_data_beats_link_0 + local_socket_inbound_data_beats_link_1 + local_socket_inbound_data_beats_link_2 + local_socket_inbound_data_beats_link_3 + local_socket_inbound_data_beats_link_4 + local_socket_inbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Outbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_outbound_data_beats_link_0 + local_socket_outbound_data_beats_link_1 + local_socket_outbound_data_beats_link_2 + local_socket_outbound_data_beats_link_3 + local_socket_outbound_data_beats_link_4 + local_socket_outbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" } ] diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index c592079982fb..1c7a2cfa321f 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -19,72 +19,109 @@ struct pmu_table_entry { }; static const char *const big_c_string = -/* offset=0 */ "default_core\000" -/* offset=13 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" -/* offset=72 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" -/* offset=131 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" -/* offset=226 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" -/* offset=325 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" -/* offset=455 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" -/* offset=570 */ "hisi_sccl,ddrc\000" -/* offset=585 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" -/* offset=671 */ "uncore_cbox\000" -/* offset=683 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" -/* offset=914 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" -/* offset=979 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" -/* offset=1050 */ "hisi_sccl,l3c\000" -/* offset=1064 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" -/* offset=1144 */ "uncore_imc_free_running\000" -/* offset=1168 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" -/* offset=1263 */ "uncore_imc\000" -/* offset=1274 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" -/* offset=1352 */ "uncore_sys_ddr_pmu\000" -/* offset=1371 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" -/* offset=1444 */ "uncore_sys_ccn_pmu\000" -/* offset=1463 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" -/* offset=1537 */ "uncore_sys_cmn_pmu\000" -/* offset=1556 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" -/* offset=1696 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=1718 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=1781 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=1947 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2011 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2078 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=2149 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=2243 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=2377 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=2441 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2509 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2579 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=2601 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=2623 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=2643 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=0 */ "tool\000" +/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000" +/* offset=78 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000" +/* offset=145 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000" +/* offset=210 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000" +/* offset=283 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000" +/* offset=425 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000" +/* offset=525 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000" +/* offset=639 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000" +/* offset=712 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000" +/* offset=795 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000" +/* offset=902 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000" +/* offset=1006 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000" +/* offset=1102 */ "default_core\000" +/* offset=1115 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" +/* offset=1174 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" +/* offset=1233 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" +/* offset=1328 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" +/* offset=1427 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" +/* offset=1557 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" +/* offset=1672 */ "hisi_sccl,ddrc\000" +/* offset=1687 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" +/* offset=1773 */ "uncore_cbox\000" +/* offset=1785 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" +/* offset=2016 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" +/* offset=2081 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" +/* offset=2152 */ "hisi_sccl,l3c\000" +/* offset=2166 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" +/* offset=2246 */ "uncore_imc_free_running\000" +/* offset=2270 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" +/* offset=2365 */ "uncore_imc\000" +/* offset=2376 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" +/* offset=2454 */ "uncore_sys_ddr_pmu\000" +/* offset=2473 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" +/* offset=2546 */ "uncore_sys_ccn_pmu\000" +/* offset=2565 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" +/* offset=2639 */ "uncore_sys_cmn_pmu\000" +/* offset=2658 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" +/* offset=2798 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=2820 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=2883 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=3049 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3113 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3180 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=3251 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=3345 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=3479 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=3543 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3611 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3681 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=3703 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=3725 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=3745 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; +static const struct compact_pmu_event pmu_events__common_tool[] = { +{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000 */ +{ 210 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000 */ +{ 283 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000 */ +{ 425 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000 */ +{ 525 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000 */ +{ 639 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000 */ +{ 712 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000 */ +{ 795 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000 */ +{ 902 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000 */ +{ 145 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000 */ +{ 1006 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000 */ +{ 78 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000 */ + +}; + +const struct pmu_table_entry pmu_events__common[] = { +{ + .entries = pmu_events__common_tool, + .num_entries = ARRAY_SIZE(pmu_events__common_tool), + .pmu_name = { 0 /* tool\000 */ }, +}, +}; + static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { -{ 13 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ -{ 72 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ -{ 325 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ -{ 455 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ -{ 131 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ -{ 226 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ +{ 1115 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ +{ 1174 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ +{ 1427 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ +{ 1557 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ +{ 1233 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ +{ 1328 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 585 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ +{ 1687 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 1064 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ +{ 2166 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 914 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ -{ 979 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ -{ 683 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ +{ 2016 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ +{ 2081 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ +{ 1785 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 1274 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ +{ 2376 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 1168 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ +{ 2270 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ }; @@ -92,51 +129,51 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc), - .pmu_name = { 570 /* hisi_sccl,ddrc\000 */ }, + .pmu_name = { 1672 /* hisi_sccl,ddrc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 1050 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 2152 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 671 /* uncore_cbox\000 */ }, + .pmu_name = { 1773 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 1263 /* uncore_imc\000 */ }, + .pmu_name = { 2365 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 1144 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 2246 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 1696 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 2377 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 2149 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 2243 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 2441 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2509 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 1781 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 1718 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 2643 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 2579 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 2601 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 2623 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 2078 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 1947 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 2011 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 2798 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 3479 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 3251 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 3345 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 3543 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 3611 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 2883 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 2820 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 3745 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 3681 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 3703 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 3725 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 3180 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 3049 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 3113 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -144,18 +181,18 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { { .entries = pmu_metrics__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 1463 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ +{ 2565 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 1556 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ +{ 2658 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 1371 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ +{ 2473 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ }; @@ -163,17 +200,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 1444 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 2546 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 1537 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 2639 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 1352 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 2454 /* uncore_sys_ddr_pmu\000 */ }, }, }; @@ -211,6 +248,15 @@ struct pmu_events_map { */ const struct pmu_events_map pmu_events_map[] = { { + .arch = "common", + .cpuid = "common", + .event_table = { + .pmus = pmu_events__common, + .num_pmus = ARRAY_SIZE(pmu_events__common), + }, + .metric_table = {}, +}, +{ .arch = "testarch", .cpuid = "testcpu", .event_table = { @@ -380,7 +426,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -457,11 +503,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -472,10 +518,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -506,12 +552,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -532,24 +587,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - if (!pmu) - return &map->metric_table; - - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index bb0a5d92df4a..d781a377757a 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -292,6 +292,7 @@ class JsonEvent: 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', 'arm_cmn': 'arm_cmn', + 'tool': 'tool', } return table[unit] if unit in table else f'uncore_{unit.lower()}' @@ -722,6 +723,17 @@ const struct pmu_events_map pmu_events_map[] = { \t} }, """) + elif arch == 'common': + _args.output_file.write("""{ +\t.arch = "common", +\t.cpuid = "common", +\t.event_table = { +\t\t.pmus = pmu_events__common, +\t\t.num_pmus = ARRAY_SIZE(pmu_events__common), +\t}, +\t.metric_table = {}, +}, +""") else: with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile: table = csv.reader(csvfile) @@ -930,7 +942,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -1007,11 +1019,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -1022,10 +1034,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -1056,12 +1068,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -1082,24 +1103,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; - - if (!pmu) - return &map->metric_table; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) @@ -1241,7 +1250,7 @@ def main() -> None: if len(parents) == _args.model.split(',')[0].count('/'): # We're testing the correct directory. item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name - if 'test' not in item_path and item_path not in _args.model.split(','): + if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','): continue action(parents, item) if item.is_dir(): @@ -1289,7 +1298,7 @@ struct pmu_table_entry { for item in os.scandir(_args.starting_dir): if not item.is_dir(): continue - if item.name == _args.arch or _args.arch == 'all' or item.name == 'test': + if item.name == _args.arch or _args.arch == 'all' or item.name == 'test' or item.name == 'common': archs.append(item.name) if len(archs) < 2 and _args.arch != 'none': diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 5435ad92180c..675562e6f770 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -103,7 +103,7 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pm void *data); const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu); -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); +const struct pmu_metrics_table *pmu_metrics_table__find(void); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 3954bd1587ce..01f54d6724a5 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -12,6 +12,7 @@ #define PY_SSIZE_T_CLEAN #include <Python.h> +#include "../../../util/config.h" #include "../../../util/trace-event.h" #include "../../../util/event.h" #include "../../../util/symbol.h" @@ -182,6 +183,15 @@ static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args) return perf_sample_src(obj, args, true); } +static PyObject *__perf_config_get(PyObject *obj, PyObject *args) +{ + const char *config_name; + + if (!PyArg_ParseTuple(args, "s", &config_name)) + return NULL; + return Py_BuildValue("s", perf_config_get(config_name)); +} + static PyMethodDef ContextMethods[] = { #ifdef HAVE_LIBTRACEEVENT { "common_pc", perf_trace_context_common_pc, METH_VARARGS, @@ -199,6 +209,7 @@ static PyMethodDef ContextMethods[] = { METH_VARARGS, "Get source file name and line number."}, { "perf_sample_srccode", perf_sample_srccode, METH_VARARGS, "Get source file name, line number and line."}, + { "perf_config_get", __perf_config_get, METH_VARARGS, "Get perf config entry"}, { NULL, NULL, 0, NULL} }; diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..ba208c90d631 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -11,36 +11,74 @@ import os from os import path import re from subprocess import * -from optparse import OptionParser, make_option +import argparse +import platform -from perf_trace_context import perf_set_itrace_options, \ - perf_sample_insn, perf_sample_srccode +from perf_trace_context import perf_sample_srccode, perf_config_get # Below are some example commands for using this script. +# Note a --kcore recording is required for accurate decode +# due to the alternatives patching mechanism. However this +# script only supports reading vmlinux for disassembly dump, +# meaning that any patched instructions will appear +# as unpatched, but the instruction ranges themselves will +# be correct. In addition to this, source line info comes +# from Perf, and when using kcore there is no debug info. The +# following lists the supported features in each mode: +# +# +-----------+-----------------+------------------+------------------+ +# | Recording | Accurate decode | Source line dump | Disassembly dump | +# +-----------+-----------------+------------------+------------------+ +# | --kcore | yes | no | yes | +# | normal | no | yes | yes | +# +-----------+-----------------+------------------+------------------+ +# +# Output disassembly with objdump and auto detect vmlinux +# (when running on same machine.) +# perf script -s scripts/python/arm-cs-trace-disasm.py -d # -# Output disassembly with objdump: -# perf script -s scripts/python/arm-cs-trace-disasm.py \ -# -- -d objdump -k path/to/vmlinux # Output disassembly with llvm-objdump: # perf script -s scripts/python/arm-cs-trace-disasm.py \ # -- -d llvm-objdump-11 -k path/to/vmlinux +# # Output only source line and symbols: # perf script -s scripts/python/arm-cs-trace-disasm.py -# Command line parsing. -option_list = [ - # formatting options for the bottom entry of the stack - make_option("-k", "--vmlinux", dest="vmlinux_name", - help="Set path to vmlinux file"), - make_option("-d", "--objdump", dest="objdump_name", - help="Set path to objdump executable file"), - make_option("-v", "--verbose", dest="verbose", - action="store_true", default=False, - help="Enable debugging log") -] +def default_objdump(): + config = perf_config_get("annotate.objdump") + return config if config else "objdump" -parser = OptionParser(option_list=option_list) -(options, args) = parser.parse_args() +# Command line parsing. +def int_arg(v): + v = int(v) + if v < 0: + raise argparse.ArgumentTypeError("Argument must be a positive integer") + return v + +args = argparse.ArgumentParser() +args.add_argument("-k", "--vmlinux", + help="Set path to vmlinux file. Omit to autodetect if running on same machine") +args.add_argument("-d", "--objdump", nargs="?", const=default_objdump(), + help="Show disassembly. Can also be used to change the objdump path"), +args.add_argument("-v", "--verbose", action="store_true", help="Enable debugging log") +args.add_argument("--start-time", type=int_arg, help="Monotonic clock time of sample to start from. " + "See 'time' field on samples in -v mode.") +args.add_argument("--stop-time", type=int_arg, help="Monotonic clock time of sample to stop at. " + "See 'time' field on samples in -v mode.") +args.add_argument("--start-sample", type=int_arg, help="Index of sample to start from. " + "See 'index' field on samples in -v mode.") +args.add_argument("--stop-sample", type=int_arg, help="Index of sample to stop at. " + "See 'index' field on samples in -v mode.") + +options = args.parse_args() +if (options.start_time and options.stop_time and + options.start_time >= options.stop_time): + print("--start-time must less than --stop-time") + exit(2) +if (options.start_sample and options.stop_sample and + options.start_sample >= options.stop_sample): + print("--start-sample must less than --stop-sample") + exit(2) # Initialize global dicts and regular expression disasm_cache = dict() @@ -48,11 +86,23 @@ cpu_data = dict() disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 +sample_idx = -1 glb_source_file_name = None glb_line_number = None glb_dso = None +kver = platform.release() +vmlinux_paths = [ + f"/usr/lib/debug/boot/vmlinux-{kver}.debug", + f"/usr/lib/debug/lib/modules/{kver}/vmlinux", + f"/lib/modules/{kver}/build/vmlinux", + f"/usr/lib/debug/boot/vmlinux-{kver}", + f"/boot/vmlinux-{kver}", + f"/boot/vmlinux", + f"vmlinux" +] + def get_optional(perf_dict, field): if field in perf_dict: return perf_dict[field] @@ -63,12 +113,25 @@ def get_offset(perf_dict, field): return "+%#x" % perf_dict[field] return "" +def find_vmlinux(): + if hasattr(find_vmlinux, "path"): + return find_vmlinux.path + + for v in vmlinux_paths: + if os.access(v, os.R_OK): + find_vmlinux.path = v + break + else: + find_vmlinux.path = None + + return find_vmlinux.path + def get_dso_file_path(dso_name, dso_build_id): if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): - if (options.vmlinux_name): - return options.vmlinux_name; + if (options.vmlinux): + return options.vmlinux; else: - return dso_name + return find_vmlinux() if find_vmlinux() else dso_name if (dso_name == "[vdso]") : append = "/vdso" @@ -92,7 +155,7 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr): else: start_addr = start_addr - dso_start; stop_addr = stop_addr - dso_start; - disasm = [ options.objdump_name, "-d", "-z", + disasm = [ options.objdump, "-d", "-z", "--start-address="+format(start_addr,"#x"), "--stop-address="+format(stop_addr,"#x") ] disasm += [ dso_fname ] @@ -112,10 +175,10 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr): def print_sample(sample): print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \ - "pid: %d tid: %d period: %d time: %d }" % \ + "pid: %d tid: %d period: %d time: %d index: %d}" % \ (sample['cpu'], sample['addr'], sample['phys_addr'], \ sample['ip'], sample['pid'], sample['tid'], \ - sample['period'], sample['time'])) + sample['period'], sample['time'], sample_idx)) def trace_begin(): print('ARM CoreSight Trace Data Assembler Dump') @@ -177,6 +240,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso): def process_event(param_dict): global cache_size global options + global sample_idx sample = param_dict["sample"] comm = param_dict["comm"] @@ -187,11 +251,26 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + map_pgoff = get_optional(param_dict, "map_pgoff") + # check for valid map offset + if (str(map_pgoff) == '[unknown]'): + map_pgoff = 0 cpu = sample["cpu"] ip = sample["ip"] addr = sample["addr"] + sample_idx += 1 + + if (options.start_time and sample["time"] < options.start_time): + return + if (options.stop_time and sample["time"] > options.stop_time): + exit(0) + if (options.start_sample and sample_idx < options.start_sample): + return + if (options.stop_sample and sample_idx > options.stop_sample): + exit(0) + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -243,9 +322,10 @@ def process_event(param_dict): # Record for previous sample packet cpu_data[str(cpu) + 'addr'] = addr - # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 - if (start_addr == 0 and stop_addr == 4): - print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) + # Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet + if (start_addr == 0): + if ((stop_addr == 4) and (options.verbose == True)): + print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return if (start_addr < int(dso_start) or start_addr > int(dso_end)): @@ -256,19 +336,20 @@ def process_event(param_dict): print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) return - if (options.objdump_name != None): + if (options.objdump != None): # It doesn't need to decrease virtual memory offset for disassembly # for kernel dso and executable file dso, so in this case we set # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 + map_pgoff = 0 else: dso_vm_start = int(dso_start) dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname): - print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) else: - print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff)) print_srccode(comm, param_dict, sample, symbol, dso) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 5671ee530019..ec4e1f034742 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -4,7 +4,6 @@ perf-test-y += builtin-test.o perf-test-y += tests-scripts.o perf-test-y += parse-events.o perf-test-y += dso-data.o -perf-test-y += attr.o perf-test-y += vmlinux-kallsyms.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o @@ -67,12 +66,13 @@ perf-test-y += sigtrap.o perf-test-y += event_groups.o perf-test-y += symbols.o perf-test-y += util.o +perf-test-y += hwmon_pmu.o +perf-test-y += tool_pmu.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif -CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c deleted file mode 100644 index 97e1bdd6ec0e..000000000000 --- a/tools/perf/tests/attr.c +++ /dev/null @@ -1,218 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * The struct perf_event_attr test support. - * - * This test is embedded inside into perf directly and is governed - * by the PERF_TEST_ATTR environment variable and hook inside - * sys_perf_event_open function. - * - * The general idea is to store 'struct perf_event_attr' details for - * each event created within single perf command. Each event details - * are stored into separate text file. Once perf command is finished - * these files can be checked for values we expect for command. - * - * Besides 'struct perf_event_attr' values we also store 'fd' and - * 'group_fd' values to allow checking for groups created. - * - * This all is triggered by setting PERF_TEST_ATTR environment variable. - * It must contain name of existing directory with access and write - * permissions. All the event text files are stored there. - */ - -#include <debug.h> -#include <errno.h> -#include <inttypes.h> -#include <stdlib.h> -#include <stdio.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <subcmd/exec-cmd.h> -#include "event.h" -#include "util.h" -#include "tests.h" -#include "pmus.h" - -#define ENV "PERF_TEST_ATTR" - -static char *dir; -static bool ready; - -void test_attr__init(void) -{ - dir = getenv(ENV); - test_attr__enabled = (dir != NULL); -} - -#define BUFSIZE 1024 - -#define __WRITE_ASS(str, fmt, data) \ -do { \ - char buf[BUFSIZE]; \ - size_t size; \ - \ - size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data); \ - if (1 != fwrite(buf, size, 1, file)) { \ - perror("test attr - failed to write event file"); \ - fclose(file); \ - return -1; \ - } \ - \ -} while (0) - -#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) - -static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - FILE *file; - char path[PATH_MAX]; - - if (!ready) - return 0; - - snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, - attr->type, attr->config, fd); - - file = fopen(path, "w+"); - if (!file) { - perror("test attr - failed to open event file"); - return -1; - } - - if (fprintf(file, "[event-%d-%llu-%d]\n", - attr->type, attr->config, fd) < 0) { - perror("test attr - failed to write event file"); - fclose(file); - return -1; - } - - /* syscall arguments */ - __WRITE_ASS(fd, "d", fd); - __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu.cpu); - __WRITE_ASS(pid, "d", pid); - __WRITE_ASS(flags, "lu", flags); - - /* struct perf_event_attr */ - WRITE_ASS(type, PRIu32); - WRITE_ASS(size, PRIu32); - WRITE_ASS(config, "llu"); - WRITE_ASS(sample_period, "llu"); - WRITE_ASS(sample_type, "llu"); - WRITE_ASS(read_format, "llu"); - WRITE_ASS(disabled, "d"); - WRITE_ASS(inherit, "d"); - WRITE_ASS(pinned, "d"); - WRITE_ASS(exclusive, "d"); - WRITE_ASS(exclude_user, "d"); - WRITE_ASS(exclude_kernel, "d"); - WRITE_ASS(exclude_hv, "d"); - WRITE_ASS(exclude_idle, "d"); - WRITE_ASS(mmap, "d"); - WRITE_ASS(comm, "d"); - WRITE_ASS(freq, "d"); - WRITE_ASS(inherit_stat, "d"); - WRITE_ASS(enable_on_exec, "d"); - WRITE_ASS(task, "d"); - WRITE_ASS(watermark, "d"); - WRITE_ASS(precise_ip, "d"); - WRITE_ASS(mmap_data, "d"); - WRITE_ASS(sample_id_all, "d"); - WRITE_ASS(exclude_host, "d"); - WRITE_ASS(exclude_guest, "d"); - WRITE_ASS(exclude_callchain_kernel, "d"); - WRITE_ASS(exclude_callchain_user, "d"); - WRITE_ASS(mmap2, "d"); - WRITE_ASS(comm_exec, "d"); - WRITE_ASS(context_switch, "d"); - WRITE_ASS(write_backward, "d"); - WRITE_ASS(namespaces, "d"); - WRITE_ASS(use_clockid, "d"); - WRITE_ASS(wakeup_events, PRIu32); - WRITE_ASS(bp_type, PRIu32); - WRITE_ASS(config1, "llu"); - WRITE_ASS(config2, "llu"); - WRITE_ASS(branch_sample_type, "llu"); - WRITE_ASS(sample_regs_user, "llu"); - WRITE_ASS(sample_stack_user, PRIu32); - - fclose(file); - return 0; -} - -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - int errno_saved = errno; - - if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { - pr_err("test attr FAILED"); - exit(128); - } - - errno = errno_saved; -} - -void test_attr__ready(void) -{ - if (unlikely(test_attr__enabled) && !ready) - ready = true; -} - -static int run_dir(const char *d, const char *perf) -{ - char v[] = "-vvvvv"; - int vcnt = min(verbose, (int) sizeof(v) - 1); - char cmd[3*PATH_MAX]; - - if (verbose > 0) - vcnt++; - - scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); - - return system(cmd) ? TEST_FAIL : TEST_OK; -} - -static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct stat st; - char path_perf[PATH_MAX]; - char path_dir[PATH_MAX]; - char *exec_path; - - if (perf_pmus__num_core_pmus() > 1) { - /* - * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type which - * requires additional support. - */ - pr_debug("Skip test on hybrid systems"); - return TEST_SKIP; - } - - /* First try development tree tests. */ - if (!lstat("./tests", &st)) - return run_dir("./tests", "./perf"); - - exec_path = get_argv_exec_path(); - if (exec_path == NULL) - return -1; - - /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); - snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); - free(exec_path); - - if (!lstat(path_dir, &st) && - !lstat(path_perf, &st)) - return run_dir(path_dir, path_perf); - - return TEST_SKIP; -} - -DEFINE_SUITE("Setup struct perf_event_attr", attr); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 470a9709427d..4751dd3c6f67 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -8,6 +8,7 @@ #include <errno.h> #include <poll.h> #include <unistd.h> +#include <setjmp.h> #include <string.h> #include <stdlib.h> #include <sys/types.h> @@ -39,11 +40,8 @@ * making them easier to debug. */ static bool dont_fork; -/* Don't fork the tests in parallel and wait for their completion. */ -static bool sequential = true; -/* Do it in parallel, lacks infrastructure to avoid running tests that clash for resources, - * So leave it as the developers choice to enable while working on the needed infra */ -static bool parallel; +/* Fork the tests in parallel and wait for their completion. */ +static bool sequential; const char *dso_to_test; const char *test_objdump_path = "objdump"; @@ -73,13 +71,14 @@ static struct test_suite *generic_tests[] = { &suite__PERF_RECORD, &suite__pmu, &suite__pmu_events, + &suite__hwmon_pmu, + &suite__tool_pmu, &suite__dso_data, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, &suite__syscall_openat_tp_fields, #endif - &suite__attr, &suite__hists_link, &suite__python_use, &suite__bp_signal, @@ -139,12 +138,6 @@ static struct test_suite *generic_tests[] = { NULL, }; -static struct test_suite **tests[] = { - generic_tests, - arch_tests, - NULL, /* shell tests created at runtime. */ -}; - static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, @@ -155,6 +148,9 @@ static struct test_workload *workloads[] = { &workload__landlock, }; +#define workloads__for_each(workload) \ + for (unsigned i = 0; i < ARRAY_SIZE(workloads) && ({ workload = workloads[i]; 1; }); i++) + static int num_subtests(const struct test_suite *t) { int num; @@ -198,6 +194,14 @@ static test_fnptr test_function(const struct test_suite *t, int subtest) return t->test_cases[subtest].run_case; } +static bool test_exclusive(const struct test_suite *t, int subtest) +{ + if (subtest <= 0) + return t->test_cases[0].exclusive; + + return t->test_cases[subtest].exclusive; +} + static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) { int i; @@ -229,20 +233,47 @@ struct child_test { int subtest; }; +static jmp_buf run_test_jmp_buf; + +static void child_test_sig_handler(int sig) +{ + siglongjmp(run_test_jmp_buf, sig); +} + static int run_test_child(struct child_process *process) { + const int signals[] = { + SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGINT, SIGPIPE, SIGQUIT, SIGSEGV, SIGTERM, + }; struct child_test *child = container_of(process, struct child_test, process); int err; + err = sigsetjmp(run_test_jmp_buf, 1); + if (err) { + fprintf(stderr, "\n---- unexpected signal (%d) ----\n", err); + err = err > 0 ? -err : -1; + goto err_out; + } + + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], child_test_sig_handler); + pr_debug("--- start ---\n"); pr_debug("test child forked, pid %d\n", getpid()); err = test_function(child->test, child->subtest)(child->test, child->subtest); pr_debug("---- end(%d) ----\n", err); + +err_out: fflush(NULL); + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], SIG_DFL); return -err; } -static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width) +#define TEST_RUNNING -3 + +static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width, + int running) { if (has_subtests(t)) { int subw = width > 2 ? width - 2 : width; @@ -252,6 +283,9 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest)); switch (result) { + case TEST_RUNNING: + color_fprintf(stderr, PERF_COLOR_YELLOW, " Running (%d active)\n", running); + break; case TEST_OK: pr_info(" Ok\n"); break; @@ -273,16 +307,25 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul return 0; } -static int finish_test(struct child_test *child_test, int width) +static void finish_test(struct child_test **child_tests, int running_test, int child_test_num, + int width) { - struct test_suite *t = child_test->test; - int i = child_test->test_num; - int subi = child_test->subtest; - int err = child_test->process.err; - bool err_done = err <= 0; + struct child_test *child_test = child_tests[running_test]; + struct test_suite *t; + int i, subi, err; + bool err_done = false; struct strbuf err_output = STRBUF_INIT; + int last_running = -1; int ret; + if (child_test == NULL) { + /* Test wasn't started. */ + return; + } + t = child_test->test; + i = child_test->test_num; + subi = child_test->subtest; + err = child_test->process.err; /* * For test suites with subtests, display the suite name ahead of the * sub test names. @@ -294,7 +337,7 @@ static int finish_test(struct child_test *child_test, int width) * Busy loop reading from the child's stdout/stderr that are set to be * non-blocking until EOF. */ - if (!err_done) + if (err > 0) fcntl(err, F_SETFL, O_NONBLOCK); if (verbose > 1) { if (has_subtests(t)) @@ -308,57 +351,90 @@ static int finish_test(struct child_test *child_test, int width) .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, }, }; - char buf[512]; - ssize_t len; + if (perf_use_color_default) { + int running = 0; - /* Poll to avoid excessive spinning, timeout set for 100ms. */ - poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); - if (!err_done && pfds[0].revents) { - errno = 0; - len = read(err, buf, sizeof(buf) - 1); + for (int y = running_test; y < child_test_num; y++) { + if (child_tests[y] == NULL) + continue; + if (check_if_command_finished(&child_tests[y]->process) == 0) + running++; + } + if (running != last_running) { + if (last_running != -1) { + /* + * Erase "Running (.. active)" line + * printed before poll/sleep. + */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); + } + print_test_result(t, i, subi, TEST_RUNNING, width, running); + last_running = running; + } + } - if (len <= 0) { - err_done = errno != EAGAIN; - } else { - buf[len] = '\0'; - if (verbose > 1) - fprintf(stdout, "%s", buf); - else + err_done = true; + if (err <= 0) { + /* No child stderr to poll, sleep for 10ms for child to complete. */ + usleep(10 * 1000); + } else { + /* Poll to avoid excessive spinning, timeout set for 100ms. */ + poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); + if (pfds[0].revents) { + char buf[512]; + ssize_t len; + + len = read(err, buf, sizeof(buf) - 1); + + if (len > 0) { + err_done = false; + buf[len] = '\0'; strbuf_addstr(&err_output, buf); + } } } + if (err_done) + err_done = check_if_command_finished(&child_test->process); + } + if (perf_use_color_default && last_running != -1) { + /* Erase "Running (.. active)" line printed before poll/sleep. */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); } /* Clean up child process. */ ret = finish_command(&child_test->process); - if (verbose == 1 && ret == TEST_FAIL) { - /* Add header for test that was skipped above. */ - if (has_subtests(t)) - pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); - else - pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); + if (verbose > 1 || (verbose == 1 && ret == TEST_FAIL)) fprintf(stderr, "%s", err_output.buf); - } + strbuf_release(&err_output); - print_test_result(t, i, subi, ret, width); + print_test_result(t, i, subi, ret, width, /*running=*/0); if (err > 0) close(err); - return 0; + zfree(&child_tests[running_test]); } static int start_test(struct test_suite *test, int i, int subi, struct child_test **child, - int width) + int width, int pass) { int err; *child = NULL; if (dont_fork) { - pr_debug("--- start ---\n"); - err = test_function(test, subi)(test, subi); - pr_debug("---- end ----\n"); - print_test_result(test, i, subi, err, width); + if (pass == 1) { + pr_debug("--- start ---\n"); + err = test_function(test, subi)(test, subi); + pr_debug("---- end ----\n"); + print_test_result(test, i, subi, err, width, /*running=*/0); + } + return 0; + } + if (pass == 1 && !sequential && test_exclusive(test, subi)) { + /* When parallel, skip exclusive tests on the first pass. */ + return 0; + } + if (pass != 1 && (sequential || !test_exclusive(test, subi))) { + /* Sequential and non-exclusive tests were run on the first pass. */ return 0; } - *child = zalloc(sizeof(**child)); if (!*child) return -ENOMEM; @@ -377,35 +453,42 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes (*child)->process.err = -1; } (*child)->process.no_exec_cmd = run_test_child; - err = start_command(&(*child)->process); - if (err || !sequential) - return err; - return finish_test(*child, width); + if (sequential || pass == 2) { + err = start_command(&(*child)->process); + if (err) + return err; + finish_test(child, /*running_test=*/0, /*child_test_num=*/1, width); + return 0; + } + return start_command(&(*child)->process); } -#define for_each_test(j, k, t) \ - for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0) \ - while ((t = tests[j][k++]) != NULL) +/* State outside of __cmd_test for the sake of the signal handler. */ + +static size_t num_tests; +static struct child_test **child_tests; +static jmp_buf cmd_test_jmp_buf; -static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) +static void cmd_test_sig_handler(int sig) { - struct test_suite *t; - unsigned int j, k; - int i = 0; - int width = 0; - size_t num_tests = 0; - struct child_test **child_tests; - int child_test_num = 0; + siglongjmp(cmd_test_jmp_buf, sig); +} + +static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], + struct intlist *skiplist) +{ + static int width = 0; + int err = 0; - for_each_test(j, k, t) { - int len = strlen(test_description(t, -1)); + for (struct test_suite **t = suites; *t; t++) { + int len = strlen(test_description(*t, -1)); if (width < len) width = len; - if (has_subtests(t)) { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - len = strlen(test_description(t, subi)); + if (has_subtests(*t)) { + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + len = strlen(test_description(*t, subi)); if (width < len) width = len; num_tests++; @@ -418,97 +501,137 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) if (!child_tests) return -ENOMEM; - for_each_test(j, k, t) { - int curr = i++; - - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { - bool skip = true; + err = sigsetjmp(cmd_test_jmp_buf, 1); + if (err) { + pr_err("\nSignal (%d) while running tests.\nTerminating tests with the same signal\n", + err); + for (size_t x = 0; x < num_tests; x++) { + struct child_test *child_test = child_tests[x]; - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - if (perf_test__matches(test_description(t, subi), - curr, argc, argv)) - skip = false; - } - - if (skip) + if (!child_test || child_test->process.pid <= 0) continue; - } - if (intlist__find(skiplist, i)) { - pr_info("%3d: %-*s:", curr + 1, width, test_description(t, -1)); - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); - continue; + pr_debug3("Killing %d pid %d\n", + child_test->test_num + 1, + child_test->process.pid); + kill(child_test->process.pid, err); } + goto err_out; + } + signal(SIGINT, cmd_test_sig_handler); + signal(SIGTERM, cmd_test_sig_handler); - if (!has_subtests(t)) { - int err = start_test(t, curr, -1, &child_tests[child_test_num++], width); + /* + * In parallel mode pass 1 runs non-exclusive tests in parallel, pass 2 + * runs the exclusive tests sequentially. In other modes all tests are + * run in pass 1. + */ + for (int pass = 1; pass <= 2; pass++) { + int child_test_num = 0; + int i = 0; + + for (struct test_suite **t = suites; *t; t++) { + int curr = i++; + + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) { + /* + * Test suite shouldn't be run based on + * description. See if subtest should. + */ + bool skip = true; + + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (perf_test__matches(test_description(*t, subi), + curr, argc, argv)) + skip = false; + } + + if (skip) + continue; + } - if (err) { - /* TODO: if !sequential waitpid the already forked children. */ - free(child_tests); - return err; + if (intlist__find(skiplist, i)) { + pr_info("%3d: %-*s:", curr + 1, width, test_description(*t, -1)); + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); + continue; } - } else { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - int err; - if (!perf_test__matches(test_description(t, subi), + if (!has_subtests(*t)) { + err = start_test(*t, curr, -1, &child_tests[child_test_num++], + width, pass); + if (err) + goto err_out; + continue; + } + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (!perf_test__matches(test_description(*t, subi), curr, argc, argv)) continue; - err = start_test(t, curr, subi, &child_tests[child_test_num++], - width); + err = start_test(*t, curr, subi, &child_tests[child_test_num++], + width, pass); if (err) - return err; + goto err_out; } } - } - for (i = 0; i < child_test_num; i++) { if (!sequential) { - int ret = finish_test(child_tests[i], width); - - if (ret) - return ret; + /* Parallel mode starts tests but doesn't finish them. Do that now. */ + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } - free(child_tests[i]); + } +err_out: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + if (err) { + pr_err("Internal test harness failure. Completing any started tests:\n:"); + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } free(child_tests); - return 0; + return err; } -static int perf_test__list(int argc, const char **argv) +static int perf_test__list(struct test_suite **suites, int argc, const char **argv) { - unsigned int j, k; - struct test_suite *t; int i = 0; - for_each_test(j, k, t) { + for (struct test_suite **t = suites; *t; t++) { int curr = i++; - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) continue; - pr_info("%3d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(*t, -1)); - if (has_subtests(t)) { - int subn = num_subtests(t); + if (has_subtests(*t)) { + int subn = num_subtests(*t); int subi; for (subi = 0; subi < subn; subi++) pr_info("%3d:%1d: %s\n", i, subi + 1, - test_description(t, subi)); + test_description(*t, subi)); } } return 0; } +static int workloads__fprintf_list(FILE *fp) +{ + struct test_workload *twl; + int printed = 0; + + workloads__for_each(twl) + printed += fprintf(fp, "%s\n", twl->name); + + return printed; +} + static int run_workload(const char *work, int argc, const char **argv) { - unsigned int i = 0; struct test_workload *twl; - for (i = 0; i < ARRAY_SIZE(workloads); i++) { - twl = workloads[i]; + workloads__for_each(twl) { if (!strcmp(twl->name, work)) return twl->func(argc, argv); } @@ -526,6 +649,55 @@ static int perf_test__config(const char *var, const char *value, return 0; } +static struct test_suite **build_suites(void) +{ + /* + * TODO: suites is static to avoid needing to clean up the scripts tests + * for leak sanitizer. + */ + static struct test_suite **suites[] = { + generic_tests, + arch_tests, + NULL, + }; + struct test_suite **result; + struct test_suite *t; + size_t n = 0, num_suites = 0; + + if (suites[2] == NULL) + suites[2] = create_script_test_suites(); + +#define for_each_test(t) \ + for (size_t i = 0, j = 0; i < ARRAY_SIZE(suites); i++, j = 0) \ + while ((t = suites[i][j++]) != NULL) + + for_each_test(t) + num_suites++; + + result = calloc(num_suites + 1, sizeof(struct test_suite *)); + + for (int pass = 1; pass <= 2; pass++) { + for_each_test(t) { + bool exclusive = false; + + if (!has_subtests(t)) { + exclusive = test_exclusive(t, -1); + } else { + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { + if (test_exclusive(t, subi)) { + exclusive = true; + break; + } + } + } + if ((!exclusive && pass == 1) || (exclusive && pass == 2)) + result[n++] = t; + } + } + return result; +#undef for_each_test +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -534,16 +706,17 @@ int cmd_test(int argc, const char **argv) }; const char *skip = NULL; const char *workload = NULL; + bool list_workloads = false; const struct option test_options[] = { OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), - OPT_BOOLEAN('p', "parallel", ¶llel, "Run the tests in parallel"), OPT_BOOLEAN('S', "sequential", &sequential, "Run the tests one after another rather than in parallel"), - OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), + OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."), + OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), OPT_STRING(0, "objdump", &test_objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -552,6 +725,7 @@ int cmd_test(int argc, const char **argv) const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); + struct test_suite **suites; if (ret < 0) return ret; @@ -561,22 +735,29 @@ int cmd_test(int argc, const char **argv) /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); - tests[2] = create_script_test_suites(); argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); - if (argc >= 1 && !strcmp(argv[0], "list")) - return perf_test__list(argc - 1, argv + 1); + if (argc >= 1 && !strcmp(argv[0], "list")) { + suites = build_suites(); + ret = perf_test__list(suites, argc - 1, argv + 1); + free(suites); + return ret; + } if (workload) return run_workload(workload, argc, argv); + if (list_workloads) { + workloads__fprintf_list(stdout); + return 0; + } + if (dont_fork) sequential = true; - else if (parallel) - sequential = false; symbol_conf.priv_size = sizeof(int); symbol_conf.try_vmlinux_path = true; + if (symbol__init(NULL) < 0) return -1; @@ -588,5 +769,8 @@ int cmd_test(int argc, const char **argv) */ rlimit__bump_memlock(); - return __cmd_test(argc, argv, skiplist); + suites = build_suites(); + ret = __cmd_test(suites, argc, argv, skiplist); + free(suites); + return ret; } diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c index 44d1be303b67..93c94408bdc8 100644 --- a/tools/perf/tests/demangle-java-test.c +++ b/tools/perf/tests/demangle-java-test.c @@ -2,6 +2,7 @@ #include <string.h> #include <stdlib.h> #include <stdio.h> +#include <linux/kernel.h> #include "tests.h" #include "session.h" #include "debug.h" @@ -28,7 +29,7 @@ static int test__demangle_java(struct test_suite *test __maybe_unused, int subte "void java.lang.Object<init>()" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = java_demangle_sym(test_cases[i].mangled, 0); if (strcmp(buf, test_cases[i].demangled)) { pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index e155f0e0e04d..deefe5003bfc 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -126,6 +126,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -134,7 +135,6 @@ static int attach__cpu_disabled(struct evlist *evlist) return err; } - perf_cpu_map__put(cpus); return evsel__enable(evsel); } @@ -153,10 +153,10 @@ static int attach__cpu_enabled(struct evlist *evlist) } err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err == -EACCES) return TEST_SKIP; - perf_cpu_map__put(cpus); return err ? TEST_FAIL : TEST_OK; } @@ -188,6 +188,7 @@ static int test_times(int (attach)(struct evlist *), err = attach(evlist); if (err == TEST_SKIP) { pr_debug(" SKIP : not enough rights\n"); + evlist__delete(evlist); return err; } diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index cf4da3d748c2..226196fb9677 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include "evsel.h" #include "tests.h" #include "debug.h" @@ -36,33 +36,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse int subtest __maybe_unused) { struct evsel *evsel = evsel__newtp("sched", "sched_switch"); - int ret = 0; + int ret = TEST_OK; if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; } if (evsel__test_field(evsel, "prev_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_state", sizeof(long), true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); @@ -70,23 +70,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return TEST_FAIL; } if (evsel__test_field(evsel, "comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "target_cpu", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); return ret; } -DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test); +static struct test_case tests__perf_evsel__tp_sched_test[] = { + TEST_CASE_REASON("Parse sched tracepoints fields", + perf_evsel__tp_sched_test, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__perf_evsel__tp_sched_test = { + .desc = "Parse sched tracepoints fields", + .test_cases = tests__perf_evsel__tp_sched_test, +}; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index e3aa9d4fcf3a..726cf8d4da28 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,6 +6,7 @@ #include "util/header.h" #include "util/smt.h" #include "tests.h" +#include <perf/cpumap.h> #include <math.h> #include <stdlib.h> #include <string.h> @@ -74,14 +75,12 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u double val, num_cpus_online, num_cpus, num_cores, num_dies, num_packages; int ret; struct expr_parse_ctx *ctx; - bool is_intel = false; char strcmp_cpuid_buf[256]; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); char *escaped_cpuid1, *escaped_cpuid2; TEST_ASSERT_VAL("get_cpuid", cpuid); - is_intel = strstr(cpuid, "Intel") != NULL; TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0); @@ -244,12 +243,19 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u if (num_dies) // Some platforms do not have CPU die support, for example s390 TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages); - TEST_ASSERT_VAL("#system_tsc_freq", expr__parse(&val, ctx, "#system_tsc_freq") == 0); - if (is_intel) - TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0); - else - TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO); + if (expr__parse(&val, ctx, "#system_tsc_freq") == 0) { + bool is_intel = strstr(cpuid, "Intel") != NULL; + + if (is_intel) + TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0); + else + TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO); + } else { +#if defined(__i386__) || defined(__x86_64__) + TEST_ASSERT_VAL("#system_tsc_freq unsupported", 0); +#endif + } /* * Source count returns the number of events aggregating in a leader * event including the leader. Check parsing yields an id. diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c new file mode 100644 index 000000000000..d2b066a2b557 --- /dev/null +++ b/tools/perf/tests/hwmon_pmu.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "hwmon_pmu.h" +#include "parse-events.h" +#include "tests.h" +#include <fcntl.h> +#include <sys/stat.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/string.h> + +static const struct test_event { + const char *name; + const char *alias; + long config; +} test_events[] = { + { + "temp_test_hwmon_event1", + "temp1", + 0xA0001, + }, + { + "temp_test_hwmon_event2", + "temp2", + 0xA0002, + }, +}; + +/* Cleanup test PMU directory. */ +static int test_pmu_put(const char *dir, struct perf_pmu *hwm) +{ + char buf[PATH_MAX + 20]; + int ret; + + if (scnprintf(buf, sizeof(buf), "rm -fr %s", dir) < 0) { + pr_err("Failure to set up buffer for \"%s\"\n", dir); + return -EINVAL; + } + ret = system(buf); + if (ret) + pr_err("Failure to \"%s\"\n", buf); + + list_del(&hwm->list); + perf_pmu__delete(hwm); + return ret; +} + +/* + * Prepare test PMU directory data, normally exported by kernel at + * /sys/class/hwmon/hwmon<number>/. Give as input a buffer to hold the file + * path, the result is PMU loaded using that directory. + */ +static struct perf_pmu *test_pmu_get(char *dir, size_t sz) +{ + const char *test_hwmon_name_nl = "A test hwmon PMU\n"; + const char *test_hwmon_name = "A test hwmon PMU"; + /* Simulated hwmon items. */ + const struct test_item { + const char *name; + const char *value; + } test_items[] = { + { "temp1_label", "test hwmon event1\n", }, + { "temp1_input", "40000\n", }, + { "temp2_label", "test hwmon event2\n", }, + { "temp2_input", "50000\n", }, + }; + int hwmon_dirfd = -1, test_dirfd = -1, file; + struct perf_pmu *hwm = NULL; + ssize_t len; + + /* Create equivalent of sysfs mount point. */ + scnprintf(dir, sz, "/tmp/perf-hwmon-pmu-test-XXXXXX"); + if (!mkdtemp(dir)) { + pr_err("mkdtemp failed\n"); + dir[0] = '\0'; + return NULL; + } + test_dirfd = open(dir, O_PATH|O_DIRECTORY); + if (test_dirfd < 0) { + pr_err("Failed to open test directory \"%s\"\n", dir); + goto err_out; + } + + /* Create the test hwmon directory and give it a name. */ + if (mkdirat(test_dirfd, "hwmon1234", 0755) < 0) { + pr_err("Failed to mkdir hwmon directory\n"); + goto err_out; + } + hwmon_dirfd = openat(test_dirfd, "hwmon1234", O_DIRECTORY); + if (hwmon_dirfd < 0) { + pr_err("Failed to open test hwmon directory \"%s/hwmon1234\"\n", dir); + goto err_out; + } + file = openat(hwmon_dirfd, "name", O_WRONLY | O_CREAT, 0600); + if (file < 0) { + pr_err("Failed to open for writing file \"name\"\n"); + goto err_out; + } + len = strlen(test_hwmon_name_nl); + if (write(file, test_hwmon_name_nl, len) < len) { + close(file); + pr_err("Failed to write to 'name' file\n"); + goto err_out; + } + close(file); + + /* Create test hwmon files. */ + for (size_t i = 0; i < ARRAY_SIZE(test_items); i++) { + const struct test_item *item = &test_items[i]; + + file = openat(hwmon_dirfd, item->name, O_WRONLY | O_CREAT, 0600); + if (file < 0) { + pr_err("Failed to open for writing file \"%s\"\n", item->name); + goto err_out; + } + + if (write(file, item->value, strlen(item->value)) < 0) { + pr_err("Failed to write to file \"%s\"\n", item->name); + close(file); + goto err_out; + } + close(file); + } + + /* Make the PMU reading the files created above. */ + hwm = perf_pmus__add_test_hwmon_pmu(hwmon_dirfd, "hwmon1234", test_hwmon_name); + if (!hwm) + pr_err("Test hwmon creation failed\n"); + +err_out: + if (!hwm) { + test_pmu_put(dir, hwm); + if (hwmon_dirfd >= 0) + close(hwmon_dirfd); + } + if (test_dirfd >= 0) + close(test_dirfd); + return hwm; +} + +static int do_test(size_t i, bool with_pmu, bool with_alias) +{ + const char *test_event = with_alias ? test_events[i].alias : test_events[i].name; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "hwmon_a_test_hwmon_pmu/%s/", test_event); + else + strlcpy(str, test_event, sizeof(str)); + + pr_debug("Testing '%s'\n", str); + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->pmu || !evsel->pmu->name || + strcmp(evsel->pmu->name, "hwmon_a_test_hwmon_pmu")) + continue; + + if (evsel->core.attr.config != (u64)test_events[i].config) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %ld\n", + __FILE__, __LINE__, str, + evsel->core.attr.config, + test_events[i].config); + ret = TEST_FAIL; + goto out; + } + found = true; + } + + if (!found) { + pr_debug("FAILED %s:%d Didn't find hwmon event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__hwmon_pmu(bool with_pmu) +{ + char dir[PATH_MAX]; + struct perf_pmu *pmu = test_pmu_get(dir, sizeof(dir)); + int ret = TEST_OK; + + if (!pmu) + return TEST_FAIL; + + for (size_t i = 0; i < ARRAY_SIZE(test_events); i++) { + ret = do_test(i, with_pmu, /*with_alias=*/false); + + if (ret != TEST_OK) + break; + + ret = do_test(i, with_pmu, /*with_alias=*/true); + + if (ret != TEST_OK) + break; + } + test_pmu_put(dir, pmu); + return ret; +} + +static int test__hwmon_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/false); +} + +static int test__hwmon_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/true); +} + +static int test__parse_hwmon_filename(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + const struct hwmon_parse_test { + const char *filename; + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + bool parse_ok; + } tests[] = { + { + .filename = "cpu0_accuracy", + .type = HWMON_TYPE_CPU, + .number = 0, + .item = HWMON_ITEM_ACCURACY, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "temp1_input", + .type = HWMON_TYPE_TEMP, + .number = 1, + .item = HWMON_ITEM_INPUT, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "fan2_vid", + .type = HWMON_TYPE_FAN, + .number = 2, + .item = HWMON_ITEM_VID, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "power3_crit_alarm", + .type = HWMON_TYPE_POWER, + .number = 3, + .item = HWMON_ITEM_CRIT, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "intrusion4_average_interval_min_alarm", + .type = HWMON_TYPE_INTRUSION, + .number = 4, + .item = HWMON_ITEM_AVERAGE_INTERVAL_MIN, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "badtype5_baditem", + .type = HWMON_TYPE_NONE, + .number = 5, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + { + .filename = "humidity6_baditem", + .type = HWMON_TYPE_NONE, + .number = 6, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(tests); i++) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + + TEST_ASSERT_EQUAL("parse_hwmon_filename", + parse_hwmon_filename( + tests[i].filename, + &type, + &number, + &item, + &alarm), + tests[i].parse_ok + ); + if (tests[i].parse_ok) { + TEST_ASSERT_EQUAL("parse_hwmon_filename type", type, tests[i].type); + TEST_ASSERT_EQUAL("parse_hwmon_filename number", number, tests[i].number); + TEST_ASSERT_EQUAL("parse_hwmon_filename item", item, tests[i].item); + TEST_ASSERT_EQUAL("parse_hwmon_filename alarm", alarm, tests[i].alarm); + } + } + return TEST_OK; +} + +static struct test_case tests__hwmon_pmu[] = { + TEST_CASE("Basic parsing test", parse_hwmon_filename), + TEST_CASE("Parsing without PMU name", hwmon_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", hwmon_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__hwmon_pmu = { + .desc = "Hwmon PMU", + .test_cases = tests__hwmon_pmu, +}; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index a5040772043f..a7fcbd589752 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -81,7 +81,7 @@ make_no_gtk2 := NO_GTK2=1 make_no_ui := NO_SLANG=1 NO_GTK2=1 make_no_demangle := NO_DEMANGLE=1 make_no_libelf := NO_LIBELF=1 -make_no_libunwind := NO_LIBUNWIND=1 +make_libunwind := LIBUNWIND=1 make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1 make_no_backtrace := NO_BACKTRACE=1 make_no_libcapstone := NO_CAPSTONE=1 @@ -121,7 +121,7 @@ make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX3 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1 -make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 +make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 @@ -153,7 +153,7 @@ run += make_no_gtk2 run += make_no_ui run += make_no_demangle run += make_no_libelf -run += make_no_libunwind +run += make_libunwind run += make_no_libdw_dwarf_unwind run += make_no_backtrace run += make_no_libcapstone diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 888df8eca981..3943da441979 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -40,7 +40,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused int flags = O_RDONLY | O_DIRECTORY; struct evlist *evlist = evlist__new(); struct evsel *evsel; - int err = -1, i, nr_events = 0, nr_polls = 0; + int ret = TEST_FAIL, err, i, nr_events = 0, nr_polls = 0; char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) { @@ -51,6 +51,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { pr_debug("%s: evsel__newtp\n", __func__); + ret = PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; goto out_delete_evlist; } @@ -138,11 +139,21 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused } } out_ok: - err = 0; + ret = TEST_OK; out_delete_evlist: evlist__delete(evlist); out: - return err; + return ret; } -DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields); +static struct test_case tests__syscall_openat_tp_fields[] = { + TEST_CASE_REASON("syscalls:sys_enter_openat event fields", + syscall_openat_tp_fields, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__syscall_openat_tp_fields = { + .desc = "syscalls:sys_enter_openat event fields", + .test_cases = tests__syscall_openat_tp_fields, +}; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9e3086d02150..82a19674a38f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -730,7 +730,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || - strcmp(evsel->pmu_name, "cpu")); + strcmp(evsel->pmu->name, "cpu")); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -898,8 +898,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -932,7 +931,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -947,7 +946,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); if (evsel__has_leader(evsel, leader)) @@ -1016,9 +1015,8 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ TEST_ASSERT_VAL("wrong exclude guest", - evsel->core.attr.exclude_guest); + !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", @@ -1072,7 +1070,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1103,8 +1101,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1122,8 +1119,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1222,7 +1218,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1437,7 +1433,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1453,7 +1449,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1468,7 +1464,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1497,7 +1493,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1513,7 +1509,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index be18506f6a24..6a681e3fb552 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -176,7 +176,8 @@ static int test__pmu_format(struct test_suite *test __maybe_unused, int subtest } memset(&attr, 0, sizeof(attr)); - ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, /*err=*/NULL); + ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, + /*apply_hardcoded=*/false, /*err=*/NULL); if (ret) { pr_err("perf_pmu__config_terms failed"); goto err_out; diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh index 2ccf4f1d46b6..1590a37363de 100755 --- a/tools/perf/tests/shell/annotate.sh +++ b/tools/perf/tests/shell/annotate.sh @@ -44,7 +44,7 @@ test_basic() { fi # Generate the annotated output file - perf annotate -i "${perfdata}" --stdio 2> /dev/null > "${perfout}" + perf annotate --no-demangle -i "${perfdata}" --stdio 2> /dev/null | head -250 > "${perfout}" # check if it has the target symbol if ! grep "${testsym}" "${perfout}" @@ -63,8 +63,8 @@ test_basic() { fi # check again with a target symbol name - if ! perf annotate -i "${perfdata}" "${testsym}" 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" "${testsym}" 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output when specifying the target symbol]" err=1 @@ -72,8 +72,8 @@ test_basic() { fi # check one more with external objdump tool (forced by --objdump option) - if ! perf annotate -i "${perfdata}" --objdump=objdump 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" --objdump=objdump 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output from non default disassembler (using --objdump)]" err=1 diff --git a/tools/perf/tests/shell/attr.sh b/tools/perf/tests/shell/attr.sh new file mode 100755 index 000000000000..5a4e43b2471d --- /dev/null +++ b/tools/perf/tests/shell/attr.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Perf attribute expectations test +# SPDX-License-Identifier: GPL-2.0 + +err=0 + +cleanup() { + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +shelldir=$(dirname "$0") +perf_path=$(which perf) +python "${shelldir}"/lib/attr.py -d "${shelldir}"/attr -v -p "$perf_path" +cleanup +exit $err diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/shell/attr/README index 4066fec7180a..67c4ca76b85d 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/shell/attr/README @@ -51,6 +51,8 @@ Following tests are defined (with perf commands): perf record --call-graph fp kill (test-record-graph-fp-aarch64) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) + perf record -e '{cycles,cache-misses}:S' kill (test-record-group-sampling1) + perf record -c 10000 -e '{cycles,cache-misses}:S' kill (test-record-group-sampling2) perf record -D kill (test-record-no-delay) perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/shell/attr/base-record index b44e4e6e4443..b44e4e6e4443 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/shell/attr/base-record diff --git a/tools/perf/tests/attr/base-record-spe b/tools/perf/tests/shell/attr/base-record-spe index 08fa96b59240..08fa96b59240 100644 --- a/tools/perf/tests/attr/base-record-spe +++ b/tools/perf/tests/shell/attr/base-record-spe diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/shell/attr/base-stat index fccd8ec4d1b0..fccd8ec4d1b0 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/shell/attr/base-stat diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/shell/attr/system-wide-dummy index a1e1d6a263bf..a1e1d6a263bf 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/shell/attr/system-wide-dummy diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/shell/attr/test-record-C0 index 198e8429a1bf..1049ac8b52f2 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/shell/attr/test-record-C0 @@ -18,5 +18,7 @@ sample_type=65927 mmap=0 comm=0 task=0 +inherit=0 [event:system-wide-dummy] +inherit=0 diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/shell/attr/test-record-basic index b0ca42a5ecc9..b0ca42a5ecc9 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/shell/attr/test-record-basic diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/shell/attr/test-record-branch-any index 1a99b3ce6b89..1a99b3ce6b89 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/shell/attr/test-record-branch-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/shell/attr/test-record-branch-filter-any index 709768b508c6..709768b508c6 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call index f943221f7825..f943221f7825 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret index fd4f5b4154a9..fd4f5b4154a9 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/shell/attr/test-record-branch-filter-hv index 4e52d685ebe1..4e52d685ebe1 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-hv diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call index e08c6ab3796e..e08c6ab3796e 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/shell/attr/test-record-branch-filter-k index b4b98f84fc2f..b4b98f84fc2f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-k diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/shell/attr/test-record-branch-filter-u index fb9610edbb0d..fb9610edbb0d 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-u diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/shell/attr/test-record-count index 5e9b9019d786..5e9b9019d786 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/shell/attr/test-record-count diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/shell/attr/test-record-data index a99bb13149c2..a99bb13149c2 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/shell/attr/test-record-data diff --git a/tools/perf/tests/attr/test-record-dummy-C0 b/tools/perf/tests/shell/attr/test-record-dummy-C0 index 576ec48b3aaf..91499405fff4 100644 --- a/tools/perf/tests/attr/test-record-dummy-C0 +++ b/tools/perf/tests/shell/attr/test-record-dummy-C0 @@ -19,7 +19,7 @@ sample_period=4000 sample_type=391 read_format=4|20 disabled=0 -inherit=1 +inherit=0 pinned=0 exclusive=0 exclude_user=0 @@ -37,7 +37,7 @@ precise_ip=0 mmap_data=0 sample_id_all=1 exclude_host=0 -exclude_guest=1 +exclude_guest=0 exclude_callchain_kernel=0 exclude_callchain_user=0 mmap2=1 diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/shell/attr/test-record-freq index 89e29f6b2ae0..89e29f6b2ae0 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/shell/attr/test-record-freq diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/shell/attr/test-record-graph-default index f0a18b4ea4f5..f0a18b4ea4f5 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/shell/attr/test-record-graph-default diff --git a/tools/perf/tests/attr/test-record-graph-default-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 index e98d62efb6f7..e98d62efb6f7 100644 --- a/tools/perf/tests/attr/test-record-graph-default-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/shell/attr/test-record-graph-dwarf index ae92061d611d..ae92061d611d 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/shell/attr/test-record-graph-dwarf diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/shell/attr/test-record-graph-fp index a6e60e839205..a6e60e839205 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/shell/attr/test-record-graph-fp diff --git a/tools/perf/tests/attr/test-record-graph-fp-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 index cbeea9971285..cbeea9971285 100644 --- a/tools/perf/tests/attr/test-record-graph-fp-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/shell/attr/test-record-group-sampling index 97e7e64a38f0..86a940d7895d 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/shell/attr/test-record-group-sampling @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 @@ -18,7 +19,7 @@ group_fd=1 type=0 config=3 -# default | PERF_SAMPLE_READ +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD sample_type=343 # PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling1 b/tools/perf/tests/shell/attr/test-record-group-sampling1 new file mode 100644 index 000000000000..4748ab7bf684 --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling1 @@ -0,0 +1,50 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 +freq=0 + +# inherit is enabled for group sampling +inherit=1 diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling2 b/tools/perf/tests/shell/attr/test-record-group-sampling2 new file mode 100644 index 000000000000..e0432244a0eb --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling2 @@ -0,0 +1,61 @@ +[config] +command = record +args = --no-bpf-event -c 10000 -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=10000 +freq=0 +write_backward=0 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=0 +freq=0 +write_backward=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/shell/attr/test-record-group1 index eeb1db392bc9..eeb1db392bc9 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/shell/attr/test-record-group1 diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/shell/attr/test-record-group2 index cebdaa8e64e4..891d41a7bddf 100644 --- a/tools/perf/tests/attr/test-record-group2 +++ b/tools/perf/tests/shell/attr/test-record-group2 @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/shell/attr/test-record-group3 b/tools/perf/tests/shell/attr/test-record-group3 new file mode 100644 index 000000000000..249be884959e --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group3 @@ -0,0 +1,31 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 +config=0|1 +sample_period=1234000 +sample_type=87 +read_format=28|31 +disabled=1 +inherit=1 +freq=0 + +[event-2:base-record] +fd=2 +group_fd=1 +config=0|1 +sample_period=6789000 +sample_type=87 +read_format=28|31 +disabled=0 +inherit=1 +mmap=0 +comm=0 +freq=0 +enable_on_exec=0 +task=0 diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/shell/attr/test-record-no-buffering index 583dcbb078ba..583dcbb078ba 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/shell/attr/test-record-no-buffering diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/shell/attr/test-record-no-inherit index 15d1dc162e1c..15d1dc162e1c 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/shell/attr/test-record-no-inherit diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/shell/attr/test-record-no-samples index 596fbd6d5a2c..596fbd6d5a2c 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/shell/attr/test-record-no-samples diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/shell/attr/test-record-period index 119101154c5e..119101154c5e 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/shell/attr/test-record-period diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/shell/attr/test-record-pfm-period index 368f5b814094..368f5b814094 100644 --- a/tools/perf/tests/attr/test-record-pfm-period +++ b/tools/perf/tests/shell/attr/test-record-pfm-period diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/shell/attr/test-record-raw index 13a5f7860c78..13a5f7860c78 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/shell/attr/test-record-raw diff --git a/tools/perf/tests/attr/test-record-spe-period b/tools/perf/tests/shell/attr/test-record-spe-period index 75f8c9cd8e3f..75f8c9cd8e3f 100644 --- a/tools/perf/tests/attr/test-record-spe-period +++ b/tools/perf/tests/shell/attr/test-record-spe-period diff --git a/tools/perf/tests/attr/test-record-spe-period-term b/tools/perf/tests/shell/attr/test-record-spe-period-term index 8f60a4fec657..8f60a4fec657 100644 --- a/tools/perf/tests/attr/test-record-spe-period-term +++ b/tools/perf/tests/shell/attr/test-record-spe-period-term diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/shell/attr/test-record-spe-physical-address index 7ebcf5012ce3..7ebcf5012ce3 100644 --- a/tools/perf/tests/attr/test-record-spe-physical-address +++ b/tools/perf/tests/shell/attr/test-record-spe-physical-address diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 index bed765450ca9..bed765450ca9 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 index 15ebfc3418e3..15ebfc3418e3 100644 --- a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 index a65113cd7311..a65113cd7311 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/shell/attr/test-stat-C0 index a2c76d10b2bb..a2c76d10b2bb 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/shell/attr/test-stat-C0 diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/shell/attr/test-stat-basic index 69867d049fda..69867d049fda 100644 --- a/tools/perf/tests/attr/test-stat-basic +++ b/tools/perf/tests/shell/attr/test-stat-basic diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/shell/attr/test-stat-default index a1e2da0a9a6d..e47fb4944679 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/shell/attr/test-stat-default @@ -88,98 +88,142 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/shell/attr/test-stat-detailed-1 index 1c52cb05c900..3d500d3e0c5c 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-1 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/shell/attr/test-stat-detailed-2 index 7e961d24a885..01777a63752f 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-2 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/shell/attr/test-stat-detailed-3 index e50535f45977..8400abd7e1e4 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-3 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 @@ -290,8 +334,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event35:base-stat] -fd=35 +[event39:base-stat] +fd=39 type=3 config=512 optional=1 @@ -300,8 +344,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event36:base-stat] -fd=36 +[event40:base-stat] +fd=40 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/shell/attr/test-stat-group1 index 1746751123dc..1746751123dc 100644 --- a/tools/perf/tests/attr/test-stat-group1 +++ b/tools/perf/tests/shell/attr/test-stat-group1 diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/shell/attr/test-stat-no-inherit index 924fbb9300d1..924fbb9300d1 100644 --- a/tools/perf/tests/attr/test-stat-no-inherit +++ b/tools/perf/tests/shell/attr/test-stat-no-inherit diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh index 2d65defb7e0f..c63bc8c73e26 100755 --- a/tools/perf/tests/shell/coresight/asm_pure_loop.sh +++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / ASM Pure Loop +# CoreSight / ASM Pure Loop (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh index ddcc9bb850f5..8e29630957c8 100755 --- a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh +++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Memcpy 16k 10 Threads +# CoreSight / Memcpy 16k 10 Threads (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh index 2ce5e139b2fd..0c4c82a1c8e1 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 10 Threads - Check TID +# CoreSight / Thread Loop 10 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh index 3ad9498753d7..d3aea9fc6ced 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 2 Threads - Check TID +# CoreSight / Thread Loop 2 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh index 4fbb4a29aad3..7429d3a2ae43 100755 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Unroll Loop Thread 10 +# CoreSight / Unroll Loop Thread 10 (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh index a6ee740f0d7e..2df05052c324 100755 --- a/tools/perf/tests/shell/ftrace.sh +++ b/tools/perf/tests/shell/ftrace.sh @@ -67,12 +67,12 @@ test_ftrace_latency() { test_ftrace_profile() { echo "perf ftrace profile test" - perf ftrace profile sleep 0.1 > "${output}" + perf ftrace profile -m 16M sleep 0.1 > "${output}" grep ^# "${output}" grep sleep "${output}" grep schedule "${output}" grep execve "${output}" - time_re="[[:space:]]+10[[:digit:]]{4}\.[[:digit:]]{3}" + time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}" # 100283.000 100283.000 100283.000 1 __x64_sys_clock_nanosleep # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds # Strip the _x64_sys part to work with other architectures diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/shell/lib/attr.py index e890c261ad26..3db9a7d78715 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/shell/lib/attr.py @@ -246,6 +246,23 @@ class Test(object): return False return True + def restore_sample_rate(self, value=10000): + try: + # Check value of sample_rate + with open("/proc/sys/kernel/perf_event_max_sample_rate", "r") as fIn: + curr_value = fIn.readline() + # If too low restore to reasonable value + if not curr_value or int(curr_value) < int(value): + with open("/proc/sys/kernel/perf_event_max_sample_rate", "w") as fOut: + fOut.write(str(value)) + + except IOError as e: + log.warning("couldn't restore sample_rate value: I/O error %s" % e) + except ValueError as e: + log.warning("couldn't restore sample_rate value: Value error %s" % e) + except TypeError as e: + log.warning("couldn't restore sample_rate value: Type error %s" % e) + def load_events(self, path, events): parser_event = configparser.ConfigParser() parser_event.read(path) @@ -283,6 +300,7 @@ class Test(object): if self.skip_test_kernel_until(): raise Notest(self, "new kernel skip") + self.restore_sample_rate() cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir, self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh index 11ed2c25ed91..184d62e7e5bd 100644 --- a/tools/perf/tests/shell/lib/coresight.sh +++ b/tools/perf/tests/shell/lib/coresight.sh @@ -18,7 +18,7 @@ BIN="$DIR/$TEST" # If the test tool/binary does not exist and is executable then skip the test if ! test -x "$BIN"; then exit 2; fi # If CoreSight is not available, skip the test -perf list cs_etm | grep -q cs_etm || exit 2 +perf list pmu | grep -q cs_etm || exit 2 DATD="." # If the data dir env is set then make the data dir use that instead of ./ if test -n "$PERF_TEST_CORESIGHT_DATADIR"; then diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py index abc1fd737782..8ddb85586131 100644 --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py @@ -57,6 +57,7 @@ def check_json_output(expected_items): 'interval': lambda x: isfloat(x), 'metric-unit': lambda x: True, 'metric-value': lambda x: isfloat(x), + 'metric-threshold': lambda x: x in ['unknown', 'good', 'less good', 'nearly bad', 'bad'], 'metricgroup': lambda x: True, 'node': lambda x: True, 'pcnt-running': lambda x: isfloat(x), @@ -68,13 +69,15 @@ def check_json_output(expected_items): for item in json.loads(input): if expected_items != -1: count = len(item) - if count != expected_items and count >= 1 and count <= 6 and 'metric-value' in item: + if count != expected_items and count >= 1 and count <= 7 and 'metric-value' in item: # Events that generate >1 metric may have isolated metric # values and possibly other prefixes like interval, core, # aggregate-number, or event-runtime/pcnt-running from multiplexing. pass elif count != expected_items and count >= 1 and count <= 5 and 'metricgroup' in item: pass + elif count == expected_items + 1 and 'metric-threshold' in item: + pass elif count != expected_items: raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}' f' in \'{item}\'') diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 8a868ae64560..76a9846cff22 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -24,8 +24,11 @@ trap trap_cleanup EXIT TERM INT test_list_json() { echo "Json output test" + # Generate perf list json output into list_output file. perf list -j -o "${list_output}" - $PYTHON -m json.tool "${list_output}" + # Validate the json using python, redirect the json copy to /dev/null as + # otherwise the test may block writing to stdout. + $PYTHON -m json.tool "${list_output}" /dev/null echo "Json output test [Success]" } diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index c1ec5762215b..30d195d4c62f 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -27,7 +27,7 @@ check() { exit fi - if ! perf list | grep -q lock:contention_begin; then + if ! perf list tracepoint | grep -q lock:contention_begin; then echo "[Skip] No lock contention tracepoints" err=2 exit diff --git a/tools/perf/tests/shell/perftool-testsuite_report.sh b/tools/perf/tests/shell/perftool-testsuite_report.sh index 973012ce92a7..a8cf75b4e77e 100755 --- a/tools/perf/tests/shell/perftool-testsuite_report.sh +++ b/tools/perf/tests/shell/perftool-testsuite_report.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perftool-testsuite_report +# perftool-testsuite_report (exclusive) # SPDX-License-Identifier: GPL-2.0 test -d "$(dirname "$0")/base_report" || exit 2 diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index d4c8005ce9b9..e459aa99a951 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -13,6 +13,7 @@ skip_test_missing_symbol ${sym} data=$(mktemp /tmp/perf.data.XXXXXX) data2=$(mktemp /tmp/perf.data2.XXXXXX) prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" err=0 set -e diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 554e12e83c55..0c5aacc446b3 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Add vfs_getname probe to get syscall args filenames +# Add vfs_getname probe to get syscall args filenames (exclusive) # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index f38c8ead0b03..47a26f25db9f 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -40,8 +40,8 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected - echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + echo "((__GI_)?getaddrinfo|text_to_binary_address)\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(gaih_inet|main)\+0x[[:xdigit:]]+[[:space:]]\(inlined|.*/bin/ping.*\)$" >> $expected ;; ppc64|ppc64le) eventattr='max-stack=4' diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 9a61928e3c9a..5940fdc1df37 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Use vfs_getname probe to get syscall args filenames +# Use vfs_getname probe to get syscall args filenames (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf record' using 'touch' to write to a temp file, then diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 048078ee2eca..0fc7a909ae9b 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record tests +# perf record tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -17,6 +17,7 @@ skip_test_missing_symbol ${testsym} err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script) testprog="perf test -w thloop" cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" br_cntr_file="/caps/branch_counter_nr" @@ -93,7 +94,7 @@ test_per_thread() { test_register_capture() { echo "Register capture test" - if ! perf list | grep -q 'br_inst_retired.near_call' + if ! perf list pmu | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" return @@ -228,6 +229,73 @@ test_cgroup() { echo "Cgroup sampling test [Success]" } +test_leader_sampling() { + echo "Basic leader sampling test" + if ! perf record -o "${perfdata}" -e "{instructions,instructions}:Su" -- \ + perf test -w brstack 2> /dev/null + then + echo "Leader sampling [Failed record]" + err=1 + return + fi + index=0 + perf script -i "${perfdata}" > $script_output + while IFS= read -r line + do + # Check if the two instruction counts are equal in each record + instructions=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="instructions:") print $(i-1)}') + if [ $(($index%2)) -ne 0 ] && [ ${instructions}x != ${prev_instructions}x ] + then + echo "Leader sampling [Failed inconsistent instructions count]" + err=1 + return + fi + index=$(($index+1)) + prev_instructions=$instructions + done < $script_output + echo "Basic leader sampling test [Success]" +} + +test_topdown_leader_sampling() { + echo "Topdown leader sampling test" + if ! perf stat -e "{slots,topdown-retiring}" true 2> /dev/null + then + echo "Topdown leader sampling [Skipped event parsing failed]" + return + fi + if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null + then + echo "Topdown leader sampling [Failed topdown events not reordered correctly]" + err=1 + return + fi + echo "Topdown leader sampling test [Success]" +} + +test_precise_max() { + echo "precise_max attribute test" + if ! perf stat -e "cycles,instructions" true 2> /dev/null + then + echo "precise_max attribute [Skipped no hardware events]" + return + fi + # Just to make sure it doesn't fail + if ! perf record -o "${perfdata}" -e "cycles:P" true 2> /dev/null + then + echo "precise_max attribute [Failed cycles:P event]" + err=1 + return + fi + # On AMD, cycles and instructions events are treated differently + if ! perf record -o "${perfdata}" -e "instructions:P" true 2> /dev/null + then + echo "precise_max attribute [Failed instructions:P event]" + err=1 + return + fi + echo "precise_max attribute test [Success]" +} + # raise the limit of file descriptors to minimum if [[ $default_fd_limit -lt $min_fd_limit ]]; then ulimit -Sn $min_fd_limit @@ -239,6 +307,9 @@ test_system_wide test_workload test_branch_counter test_cgroup +test_leader_sampling +test_topdown_leader_sampling +test_precise_max # restore the default value ulimit -Sn $default_fd_limit diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh index 32314641217e..8d750ee631f8 100755 --- a/tools/perf/tests/shell/record_lbr.sh +++ b/tools/perf/tests/shell/record_lbr.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record LBR tests +# perf record LBR tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index 67c925f3a15a..678947fe69ee 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -1,5 +1,5 @@ #!/bin/sh -# perf record offcpu profiling tests +# perf record offcpu profiling tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 3f1e67795490..7a8adf81e4b3 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -73,9 +73,33 @@ test_topdown_groups() { err=1 return fi - if perf stat -e '{topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + if perf stat -e 'instructions,topdown-retiring,slots' true 2>&1 | grep -E -q "<not supported>" then - echo "Topdown event group test [Failed slots not reordered first]" + echo "Topdown event group test [Failed slots not reordered first in no-group case]" + err=1 + return + fi + if perf stat -e '{instructions,topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed slots not reordered first in single group case]" + err=1 + return + fi + if perf stat -e '{instructions,slots},topdown-retiring' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics event not move into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,slots},{topdown-retiring}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics group not merge into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed raw format slots not reordered first]" err=1 return fi @@ -117,16 +141,18 @@ test_cputype() { # Find a known PMU for cputype. pmu="" - for i in cpu cpu_atom armv8_pmuv3_0 + devs="/sys/bus/event_source/devices" + for i in $devs/cpu $devs/cpu_atom $devs/armv8_pmuv3_0 $devs/armv8_cortex_* do - if test -d "/sys/devices/$i" + i_base=$(basename "$i") + if test -d "$i" then - pmu="$i" + pmu="$i_base" break fi - if perf stat -e "$i/instructions/" true > /dev/null 2>&1 + if perf stat -e "$i_base/instructions/" true > /dev/null 2>&1 then - pmu="$i" + pmu="$i_base" break fi done @@ -146,6 +172,30 @@ test_cputype() { echo "cputype test [Success]" } +test_hybrid() { + # Test the default stat command on hybrid devices opens one cycles event for + # each CPU type. + echo "hybrid test" + + # Count the number of core PMUs, assume minimum of 1 + pmus=$(ls /sys/bus/event_source/devices/*/cpus 2>/dev/null | wc -l) + if [ "$pmus" -lt 1 ] + then + pmus=1 + fi + + # Run default Perf stat + cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*| cycles[:uH]* " -c) + + if [ "$pmus" -ne "$cycles_events" ] + then + echo "hybrid test [Found $pmus PMUs but $cycles_events cycles events. Failed]" + err=1 + return + fi + echo "hybrid test [Success]" +} + test_default_stat test_stat_record_report test_stat_record_script @@ -153,4 +203,5 @@ test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups test_cputype +test_hybrid exit $err diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index 55ef9c9ded2d..c6d61a4ac3e7 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -1,9 +1,7 @@ -#!/bin/sh +#!/bin/bash # perf all metricgroups test # SPDX-License-Identifier: GPL-2.0 -set -e - ParanoidAndNotRoot() { [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] @@ -14,11 +12,37 @@ if ParanoidAndNotRoot 0 then system_wide_flag="" fi - +err=0 for m in $(perf list --raw-dump metricgroups) do echo "Testing $m" - perf stat -M "$m" $system_wide_flag sleep 0.01 + result=$(perf stat -M "$m" $system_wide_flag sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] + then + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + else + echo "Metric group $m failed" + echo $result + err=1 # Fail + fi + fi done -exit 0 +exit $err diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index 54774525e18a..73e9347e88a9 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -2,42 +2,87 @@ # perf all metrics test # SPDX-License-Identifier: GPL-2.0 +ParanoidAndNotRoot() +{ + [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] +} + +system_wide_flag="-a" +if ParanoidAndNotRoot 0 +then + system_wide_flag="" +fi + err=0 for m in $(perf list --raw-dump metrics); do echo "Testing $m" - result=$(perf stat -M "$m" true 2>&1) - if [[ "$result" =~ ${m:0:50} ]] || [[ "$result" =~ "<not supported>" ]] + result=$(perf stat -M "$m" $system_wide_flag -- sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] then - continue + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "<not supported>" ]] + then + echo "Not supported events" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] + then + echo "FP issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "PMM" ]] + then + echo "Optane memory issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + fi fi - # Failed so try system wide. - result=$(perf stat -M "$m" -a sleep 0.01 2>&1) + if [[ "$result" =~ ${m:0:50} ]] then continue fi - # Failed again, possibly the workload was too small so retry with something - # longer. - result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + + # Failed, possibly the workload was too small so retry with something longer. + result=$(perf stat -M "$m" $system_wide_flag -- perf bench internals synthesize 2>&1) if [[ "$result" =~ ${m:0:50} ]] then continue fi echo "Metric '$m' not printed in:" echo "$result" - if [[ "$err" != "1" ]] - then - err=2 - if [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] - then - echo "Skip, not fail, for FP issues" - elif [[ "$result" =~ "PMM" ]] - then - echo "Skip, not fail, for Optane memory issues" - else - err=1 - fi - fi + err=1 done exit "$err" diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index d2a3506e0d19..8b148b300be1 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -1,23 +1,51 @@ -#!/bin/sh -# perf all PMU test +#!/bin/bash +# perf all PMU test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e +err=0 +result="" + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + echo "$result" + exit 1 +} +trap trap_cleanup EXIT TERM INT # Test all PMU events; however exclude parameterized ones (name contains '?') -for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do +for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g') +do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) - if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then - # We failed to see the event and it is supported. Possibly the workload was - # too small so retry with something longer. - result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) - if ! echo "$result" | grep -q "$p" ; then - echo "Event '$p' not printed in:" - echo "$result" - exit 1 - fi + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue + fi + if echo "$result" | grep -q "<not supported>" + then + # Event not supported, so ignore. + continue + fi + if echo "$result" | grep -q "Access to performance monitoring and observability operations is limited." + then + # Access is limited, so ignore. + continue + fi + + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue fi + echo "Error: event '$p' not printed in:" + echo "$result" + err=1 done -exit 0 +trap - EXIT TERM INT +exit $err diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index f250b7d6f773..95d2ad5d17c6 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -1,10 +1,10 @@ #!/bin/sh -# perf stat --bpf-counters test +# perf stat --bpf-counters test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e -workload="perf test -w brstack" +workload="perf test -w sqrtloop" # check whether $2 is within +/- 20% of $1 compare_number() diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh index e75d0780dc78..2ec69060c42f 100755 --- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh +++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh @@ -58,22 +58,9 @@ check_system_wide_counted() fi } -check_cpu_list_counted() -{ - check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1) - if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then - echo "Some CPU events are not counted" - if [ "${verbose}" = "1" ]; then - echo ${check_cpu_list_counted_output} - fi - exit 1 - fi -} - check_bpf_counter find_cgroups check_system_wide_counted -check_cpu_list_counted exit 0 diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 3302ea0b9672..573af9235b72 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm CoreSight trace data recording and synthesized samples +# Check Arm CoreSight trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data with Arm CoreSight sinks; # then verify if there have any branch samples and instruction samples @@ -12,7 +12,7 @@ glb_err=0 skip_if_no_cs_etm_event() { - perf list | grep -q 'cs_etm//' && return 0 + perf list pmu | grep -q 'cs_etm//' && return 0 # cs_etm event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_coresight_disasm.sh b/tools/perf/tests/shell/test_arm_coresight_disasm.sh new file mode 100755 index 000000000000..be2d26303f94 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_coresight_disasm.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# Check Arm CoreSight disassembly script completes without errors (exclusive) +# SPDX-License-Identifier: GPL-2.0 + +# The disassembly script reconstructs ranges of instructions and gives these to objdump to +# decode. objdump doesn't like ranges that go backwards, but these are a good indication +# that decoding has gone wrong either in OpenCSD, Perf or in the range reconstruction in +# the script. Test all 3 parts are working correctly by running the script. + +skip_if_no_cs_etm_event() { + perf list pmu | grep -q 'cs_etm//' && return 0 + + # cs_etm event doesn't exist + return 2 +} + +skip_if_no_cs_etm_event || exit 2 + +# Assume an error unless we reach the very end +set -e +glb_err=1 + +perfdata_dir=$(mktemp -d /tmp/__perf_test.perf.data.XXXXX) +perfdata=${perfdata_dir}/perf.data +file=$(mktemp /tmp/temporary_file.XXXXX) +# Relative path works whether it's installed or running from repo +script_path=$(dirname "$0")/../../scripts/python/arm-cs-trace-disasm.py + +cleanup_files() +{ + set +e + rm -rf ${perfdata_dir} + rm -f ${file} + trap - EXIT TERM INT + exit $glb_err +} + +trap cleanup_files EXIT TERM INT + +# Ranges start and end on branches, so check for some likely branch instructions +sep="\s\|\s" +branch_search="\sbl${sep}b${sep}b.ne${sep}b.eq${sep}cbz\s" + +## Test kernel ## +if [ -e /proc/kcore ]; then + echo "Testing kernel disassembly" + perf record -o ${perfdata} -e cs_etm//k --kcore -- touch $file > /dev/null 2>&1 + perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} + grep -q -e ${branch_search} ${file} + echo "Found kernel branches" +else + # kcore is required for correct kernel decode due to runtime code patching + echo "No kcore, skipping kernel test" +fi + +## Test user ## +echo "Testing userspace disassembly" +perf record -o ${perfdata} -e cs_etm//u -- touch $file > /dev/null 2>&1 +perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} +grep -q -e ${branch_search} ${file} +echo "Found userspace branches" + +glb_err=0 diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh index 03d5c7d12ee5..3258368634f7 100755 --- a/tools/perf/tests/shell/test_arm_spe.sh +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm SPE trace data recording and synthesized samples +# Check Arm SPE trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data of Arm SPE events; # then verify if any SPE event samples are generated by SPE with @@ -9,7 +9,7 @@ # German Gomez <german.gomez@arm.com>, 2021 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 # arm_spe event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index 1a7e6a82d0e3..8efeef9fb956 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -5,7 +5,7 @@ # German Gomez <german.gomez@arm.com>, 2022 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 return 2 } diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 3dfa91832aa8..c86da0235059 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Test data symbol +# Test data symbol (exclusive) # SPDX-License-Identifier: GPL-2.0 # Leo Yan <leo.yan@linaro.org>, 2022 diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index 723ec501f99a..e6f0070975f6 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -1,11 +1,11 @@ #!/bin/sh -# Miscellaneous Intel PT testing +# Miscellaneous Intel PT testing (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e # Skip if no Intel PT -perf list | grep -q 'intel_pt//' || exit 2 +perf list pmu | grep -q 'intel_pt//' || exit 2 shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh diff --git a/tools/perf/tests/shell/test_stat_intel_tpebs.sh b/tools/perf/tests/shell/test_stat_intel_tpebs.sh index c60b29add980..f95fc64bf0a7 100755 --- a/tools/perf/tests/shell/test_stat_intel_tpebs.sh +++ b/tools/perf/tests/shell/test_stat_intel_tpebs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# test Intel TPEBS counting mode +# test Intel TPEBS counting mode (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -8,12 +8,15 @@ grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } # Use this event for testing because it should exist in all platforms event=cache-misses:R +# Hybrid platforms output like "cpu_atom/cache-misses/R", rather than as above +alt_name=/cache-misses/R + # Without this cmd option, default value or zero is returned -echo "Testing without --record-tpebs" -result=$(perf stat -e "$event" true 2>&1) -[[ "$result" =~ $event ]] || exit 1 +#echo "Testing without --record-tpebs" +#result=$(perf stat -e "$event" true 2>&1) +#[[ "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 # In platforms that do not support TPEBS, it should execute without error. echo "Testing with --record-tpebs" result=$(perf stat -e "$event" --record-tpebs -a sleep 0.01 2>&1) -[[ "$result" =~ "perf record" && "$result" =~ $event ]] || exit 1 +[[ "$result" =~ "perf record" && "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3146a1eece07..708a13f00635 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check open filename arg using perf trace + vfs_getname +# Check open filename arg using perf trace + vfs_getname (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf trace' using 'touch' to write to a temp file, then @@ -19,7 +19,7 @@ skip_if_no_perf_trace || exit 2 . "$(dirname $0)"/lib/probe_vfs_getname.sh trace_open_vfs_getname() { - evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" + evts="$(echo "$(perf list tracepoint 2>/dev/null | grep -E 'syscalls:sys_enter_open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +\"?${file}\"?, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/shell/trace_exit_race.sh b/tools/perf/tests/shell/trace_exit_race.sh new file mode 100755 index 000000000000..fbb0adc33a88 --- /dev/null +++ b/tools/perf/tests/shell/trace_exit_race.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# perf trace exit race +# SPDX-License-Identifier: GPL-2.0 + +# Check that the last events of a perf trace'd subprocess are not +# lost. Specifically, trace the exiting syscall of "true" 10 times and ensure +# the output contains 10 correct lines. + +# shellcheck source=lib/probe.sh +. "$(dirname $0)"/lib/probe.sh + +skip_if_no_perf_trace || exit 2 + +if [ "$1" = "-v" ]; then + verbose="1" +fi + +iter=10 +regexp=" +[0-9]+\.[0-9]+ [0-9]+ syscalls:sys_enter_exit_group\(\)$" + +trace_shutdown_race() { + for _ in $(seq $iter); do + perf trace --no-comm -e syscalls:sys_enter_exit_group true 2>>$file + done + result="$(grep -c -E "$regexp" $file)" + [ $result = $iter ] +} + + +file=$(mktemp /tmp/temporary_file.XXXXX) + +# Do not use whatever ~/.perfconfig file, it may change the output +# via trace.{show_timestamp,show_prefix,etc} +export PERF_CONFIG=/dev/null + +trace_shutdown_race +err=$? + +if [ $err != 0 ] && [ "${verbose}" = "1" ]; then + lines_not_matching=$(mktemp /tmp/temporary_file.XXXXX) + if grep -v -E "$regexp" $file > $lines_not_matching ; then + echo "Lines not matching the expected regexp: '$regexp':" + cat $lines_not_matching + else + echo "Missing output, expected $iter but only got $result" + fi + rm -f $lines_not_matching +fi + +rm -f ${file} +exit $err diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index d33d0952025c..8e328bbd509d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -152,4 +152,11 @@ out_delete_evlist: return err; } -DEFINE_SUITE("Number of exit events of a simple workload", task_exit); +struct test_case tests__task_exit[] = { + TEST_CASE_EXCLUSIVE("Number of exit events of a simple workload", task_exit), + { .name = NULL, } +}; +struct test_suite suite__task_exit = { + .desc = "Number of exit events of a simple workload", + .test_cases = tests__task_exit, +}; diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index ed114b044293..cf3ae0c1d871 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -175,6 +175,7 @@ static void append_script(int dir_fd, const char *name, char *desc, struct test_suite *test_suite, **result_tmp; struct test_case *tests; size_t len; + char *exclusive; snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd); len = readlink(link, filename, sizeof(filename)); @@ -191,9 +192,13 @@ static void append_script(int dir_fd, const char *name, char *desc, return; } tests[0].name = strdup_check(name); + exclusive = strstr(desc, " (exclusive)"); + if (exclusive != NULL) { + tests[0].exclusive = true; + exclusive[0] = '\0'; + } tests[0].desc = strdup_check(desc); tests[0].run_case = shell_test__run; - test_suite = zalloc(sizeof(*test_suite)); if (!test_suite) { pr_err("Out of memory while building script test suite list\n"); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 6ea2be86b7bf..cb58b43aa063 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -36,6 +36,7 @@ struct test_case { const char *desc; const char *skip_reason; test_fnptr run_case; + bool exclusive; }; struct test_suite { @@ -62,6 +63,14 @@ struct test_suite { .skip_reason = _reason, \ } +#define TEST_CASE_EXCLUSIVE(description, _name) \ + { \ + .name = #_name, \ + .desc = description, \ + .run_case = test__##_name, \ + .exclusive = true, \ + } + #define DEFINE_SUITE(description, _name) \ struct test_case tests__##_name[] = { \ TEST_CASE(description, _name), \ @@ -83,6 +92,8 @@ DECLARE_SUITE(perf_evsel__tp_sched_test); DECLARE_SUITE(syscall_openat_tp_fields); DECLARE_SUITE(pmu); DECLARE_SUITE(pmu_events); +DECLARE_SUITE(hwmon_pmu); +DECLARE_SUITE(tool_pmu); DECLARE_SUITE(attr); DECLARE_SUITE(dso_data); DECLARE_SUITE(dso_data_cache); diff --git a/tools/perf/tests/tool_pmu.c b/tools/perf/tests/tool_pmu.c new file mode 100644 index 000000000000..187942b749b7 --- /dev/null +++ b/tools/perf/tests/tool_pmu.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "parse-events.h" +#include "tests.h" +#include "tool_pmu.h" + +static int do_test(enum tool_pmu_event ev, bool with_pmu) +{ + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "tool/%s/", tool_pmu__event_to_str(ev)); + else + snprintf(str, sizeof(str), "%s", tool_pmu__event_to_str(ev)); + + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + if (tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + ret = TEST_OK; + goto out; + } + + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (perf_pmu__is_tool(evsel->pmu)) { + if (evsel->core.attr.config != ev) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %d\n", + __FILE__, __LINE__, str, evsel->core.attr.config, ev); + ret = TEST_FAIL; + goto out; + } + found = true; + } + } + + if (!found && !tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + pr_debug("FAILED %s:%d Didn't find tool event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__tool_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/false); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static int test__tool_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/true); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static struct test_case tests__tool_pmu[] = { + TEST_CASE("Parsing without PMU name", tool_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", tool_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__tool_pmu = { + .desc = "Tool PMU", + .test_cases = tests__tool_pmu, +}; diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh index e3f13f96a27c..fac4d0c049fc 100755 --- a/tools/perf/trace/beauty/fs_at_flags.sh +++ b/tools/perf/trace/beauty/fs_at_flags.sh @@ -13,13 +13,14 @@ printf "static const char *fs_at_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' # AT_EACCESS is only meaningful to faccessat, so we will special case it there... # AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC -# AT_HANDLE_FID and AT_HANDLE_MNT_ID_UNIQUE are reusing values and are valid only for name_to_handle_at() +# AT_HANDLE_FID, AT_HANDLE_MNT_ID_UNIQUE and AT_HANDLE_CONNECTABLE are reusing values and are valid only for name_to_handle_at() # AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2() grep -E $regex ${linux_fcntl} | \ grep -v AT_EACCESS | \ grep -v AT_STATX_SYNC_TYPE | \ grep -v AT_HANDLE_FID | \ grep -v AT_HANDLE_MNT_ID_UNIQUE | \ + grep -v AT_HANDLE_CONNECTABLE | \ grep -v AT_RENAME_NOREPLACE | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 87e2dec79fea..6e6907e63bfc 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -153,9 +153,6 @@ object identity and may not be usable with open_by_handle_at(2). */ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ - -#if defined(__KERNEL__) -#define AT_GETATTR_NOSEC 0x80000000 -#endif +#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index 225bc366ffcb..c07008816aca 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 mnt_opts; /* [str] Mount options of the mount */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -173,7 +173,13 @@ struct statmount { __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ __u64 mnt_ns_id; /* ID of the mount namespace */ - __u64 __spare2[49]; + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 __spare2[46]; char str[]; /* Variable size part containing strings */ }; @@ -207,6 +213,10 @@ struct mnt_id_req { #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ #define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ /* * Special @mnt_id values that can be passed to listmount diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 35791791a879..5c6080680cb2 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,26 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/util/Build b/tools/perf/util/Build index dc616292b2dd..c06d2ee9024c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -83,6 +83,8 @@ perf-util-y += pmu.o perf-util-y += pmus.o perf-util-y += pmu-flex.o perf-util-y += pmu-bison.o +perf-util-y += hwmon_pmu.o +perf-util-y += tool_pmu.o perf-util-y += svghelper.o perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o perf-util-y += trace-event-scripting.o @@ -199,11 +201,14 @@ ifndef CONFIG_SETNS perf-util-y += setns.o endif -perf-util-$(CONFIG_DWARF) += probe-finder.o -perf-util-$(CONFIG_DWARF) += dwarf-aux.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += debuginfo.o -perf-util-$(CONFIG_DWARF) += annotate-data.o +perf-util-$(CONFIG_LIBDW) += probe-finder.o +perf-util-$(CONFIG_LIBDW) += dwarf-aux.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o +perf-util-$(CONFIG_LIBDW) += debuginfo.o +perf-util-$(CONFIG_LIBDW) += annotate-data.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o @@ -234,7 +239,7 @@ perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o ifdef CONFIG_JITDUMP perf-util-$(CONFIG_LIBELF) += jitdump.o perf-util-$(CONFIG_LIBELF) += genelf.o -perf-util-$(CONFIG_DWARF) += genelf_debug.o +perf-util-$(CONFIG_LIBDW) += genelf_debug.o endif perf-util-y += perf-hooks.o diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 8ac0fd94a0ba..98c80b2268dd 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -9,7 +9,7 @@ #include "dwarf-regs.h" #include "annotate.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "debuginfo.h" #endif @@ -165,7 +165,7 @@ struct annotated_data_stat { }; extern struct annotated_data_stat ann_data_stat; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* * Type information in a register, valid when @ok is true. * The @caller_saved registers are invalidated after a function call. @@ -244,7 +244,7 @@ bool get_global_var_info(struct data_loc_info *dloc, u64 addr, const char **var_name, int *var_offset); void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline struct annotated_data_type * find_data_type(struct data_loc_info *dloc __maybe_unused) @@ -276,7 +276,7 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un return -1; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_SLANG_SUPPORT int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 37ce43c4eb8f..32e15c9f53f3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2116,6 +2116,12 @@ static int annotation__config(const char *var, const char *value, void *data) opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.disassemblers")) { + opt->disassemblers_str = strdup(value); + if (!opt->disassemblers_str) { + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; + } } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2292,7 +2298,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg1 = get_dwarf_regnum(regname, 0); + op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); /* Get the second register */ @@ -2305,7 +2311,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg2 = get_dwarf_regnum(regname, 0); + op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); } return 0; @@ -2405,7 +2411,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, return -1; if (*s == arch->objdump.register_char) - op_loc->reg1 = get_dwarf_regnum(s, 0); + op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags); else if (*s == arch->objdump.imm_char) { op_loc->offset = strtol(s + 1, &p, 0); if (p && p != s + 1) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8b9e05a1932f..194a05cbc506 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,6 +34,9 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 +// llvm, capstone, objdump +#define MAX_DISASSEMBLERS 3 + struct annotation_options { bool hide_src_code, use_offset, @@ -49,11 +52,14 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; + u8 nr_disassemblers; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; + const char *disassemblers_str; + const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..358c611eeddb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -56,15 +56,15 @@ enum arm_spe_op_type { ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, }; struct arm_spe_record { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 138ffc71b32d..dbf13f47879c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -46,7 +46,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -69,7 +68,7 @@ struct arm_spe { u64 llc_access_id; u64 tlb_miss_id; u64 tlb_access_id; - u64 branch_miss_id; + u64 branch_id; u64 remote_access_id; u64 memory_id; u64 instructions_id; @@ -78,6 +77,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -96,6 +100,7 @@ struct arm_spe_queue { u64 timestamp; struct thread *thread; u64 period_instructions; + u32 flags; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -118,7 +123,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -273,6 +278,20 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; @@ -376,6 +395,7 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.stream_id = spe_events_id; sample.addr = record->to_ip; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -400,24 +420,44 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; sample.data_src = data_src; sample.period = spe->instructions_sample_period; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__sample_flags(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + + speq->flags = 0; + if (record->op & ARM_SPE_OP_BRANCH_ERET) { + speq->flags = PERF_IP_FLAG_BRANCH; + + if (record->type & ARM_SPE_BRANCH_MISS) + speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + } +} + +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -439,17 +479,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -458,8 +498,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -467,7 +507,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -476,13 +516,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: + case ARM_SPE_COMMON_DS_REMOTE: data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_COMMON_DS_DRAM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; @@ -492,8 +532,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src->mem_lvl = PERF_MEM_LVL_L3; @@ -515,10 +555,55 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + bool is_in_cpu_list; + u64 *metadata = NULL; + u64 midr = 0; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(spe->session->evlist->env); + midr = strtol(cpuid, NULL, 16); + } else { + /* CPU ID is -1 for per-thread mode */ + if (speq->cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return false; + + /* In homogeneous system, simply use CPU0's metadata */ + if (spe->metadata) + metadata = spe->metadata[0]; + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + } + + if (!metadata) + return false; + + midr = metadata[ARM_SPE_CPU_MIDR]; + } + + is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus); + if (is_in_cpu_list) + return true; + else + return false; +} + +static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_common = arm_spe__is_common_ds_encoding(speq); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -527,10 +612,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m else return 0; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); + if (is_common) + arm_spe__synth_data_source_common(record, &data_src); else - arm_spe__synth_data_source_generic(record, &data_src); + arm_spe__synth_memory_level(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -551,7 +636,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + arm_spe__sample_flags(speq); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -601,8 +687,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } - if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { + err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) return err; } @@ -1016,6 +1102,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1050,6 +1203,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1061,16 +1215,60 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", +}; + +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", +}; + +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; + if (!dump_trace) return; - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } + + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); + + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } static void arm_spe_set_event_name(struct evlist *evlist, u64 id, @@ -1202,12 +1400,12 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) if (spe->synth_opts.branches) { spe->sample_branch = true; - /* Branch miss */ + /* Branch */ err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; - spe->branch_miss_id = id; - arm_spe_set_event_name(evlist, id, "branch-miss"); + spe->branch_id = id; + arm_spe_set_event_name(evlist, id, "branch"); id += 1; } @@ -1258,24 +1456,57 @@ synth_instructions_out: return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1284,8 +1515,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1318,7 +1555,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; @@ -1346,5 +1583,7 @@ err_free_queues: session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 4f4900c18f3e..390679a4af2f 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,46 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a1895a4f530b..dddaf4f3ffed 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -75,7 +75,6 @@ enum itrace_period_type { * (not fully accurate, since CYC packets are only emitted * together with other events, such as branches) * @branches: whether to synthesize 'branches' events - * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -650,7 +649,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ " y[period]: synthesize cycles events (same period as i)\n" \ -" b: synthesize branches events (branch misses for Arm SPE)\n" \ +" b: synthesize branches events\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index e87b6789eb9e..a4fdf6911ec1 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -375,7 +375,7 @@ static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *en pfi = zalloc(sizeof(*pfi)); if (pfi == NULL) { pr_err("Cannot save pinned filter index\n"); - goto err; + return -ENOMEM; } pfi->evsel = evsel; diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h deleted file mode 100644 index 66dcf751ef65..000000000000 --- a/tools/perf/util/bpf-prologue.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, He Kuang <hekuang@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_PROLOGUE_H -#define __BPF_PROLOGUE_H - -struct probe_trace_arg; -struct bpf_insn; - -#define BPF_PROLOGUE_MAX_ARGS 3 -#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 -#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 - -#ifdef HAVE_BPF_PROLOGUE -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space); -#else -#include <linux/compiler.h> -#include <errno.h> - -static inline int -bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, - int nargs __maybe_unused, - struct bpf_insn *new_prog __maybe_unused, - size_t *new_cnt, - size_t cnt_space __maybe_unused) -{ - if (!new_cnt) - return -EINVAL; - *new_cnt = 0; - return -ENOTSUP; -} -#endif -#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7a8af60e0f51..73fcafbffc6a 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -394,6 +394,7 @@ static int bperf_check_target(struct evsel *evsel, } static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -444,12 +445,32 @@ out: return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; @@ -529,9 +550,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -543,6 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -553,12 +572,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -623,7 +644,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ do_add: return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8982f68e7230..e763e8d99a43 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -277,7 +277,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid struct perf_record_header_build_id b; size_t len; - len = sizeof(b) + name_len + 1; + len = name_len + 1; len = PERF_ALIGN(len, sizeof(u64)); memset(&b, 0, sizeof(b)); @@ -286,7 +286,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid misc |= PERF_RECORD_MISC_BUILD_ID_SIZE; b.pid = pid; b.header.misc = misc; - b.header.size = len; + b.header.size = sizeof(b) + len; err = do_write(fd, &b, sizeof(b)); if (err < 0) diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index bffbdd216a6a..e51f0a676a22 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - - while (count) { - char *p = memchr(buf, '\n', count); - - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - const char *get_percent_color(double percent) { const char *color = PERF_COLOR_NORMAL; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 01f7bed21c9b..9a7248dbe2d7 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -2,6 +2,7 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include <linux/compiler.h> #include <stdio.h> #include <stdarg.h> @@ -22,6 +23,7 @@ #define MIN_GREEN 0.5 #define MIN_RED 5.0 +#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r" /* * This variable stores the value of color.ui */ @@ -37,12 +39,11 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4); +int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); -int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); +int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7a650de0db83..68f9407ca74b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -912,6 +912,7 @@ void set_buildid_dir(const char *dir) struct perf_config_scan_data { const char *name; const char *fmt; + const char *value; va_list args; int ret; }; @@ -939,3 +940,24 @@ int perf_config_scan(const char *name, const char *fmt, ...) return d.ret; } + +static int perf_config_get_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->value = value; + + return 0; +} + +const char *perf_config_get(const char *name) +{ + struct perf_config_scan_data d = { + .name = name, + .value = NULL, + }; + + perf_config(perf_config_get_cb, &d); + return d.value; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2e5e808928a5..9971313d61c1 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); +const char *perf_config_get(const char *name); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index b78ef0262135..b85a8837bddc 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -685,9 +685,14 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, } if (d_params->operation == CS_ETM_OPERATION_DECODE) { + int decode_flags = OCSD_CREATE_FLG_FULL_DECODER; +#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK + decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE | + ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK; +#endif if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, + decode_flags, trace_config, &csid)) return -1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 40f047baef81..0bf9e5c27b59 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2490,12 +2490,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) /* Ignore return value */ cs_etm__process_traceid_queue(etmq, tidq); - - /* - * Generate an instruction sample with the remaining - * branchstack entries. - */ - cs_etm__flush(etmq, tidq); } } @@ -2638,7 +2632,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) while (1) { if (!etm->heap.heap_cnt) - goto out; + break; /* Take the entry at the top of the min heap */ cs_queue_nr = etm->heap.heap_array[0].queue_nr; @@ -2721,6 +2715,23 @@ refetch: ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } + for (i = 0; i < etm->queues.nr_queues; i++) { + struct int_node *inode; + + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + int idx = (int)(intptr_t)inode->priv; + + /* Flush any remaining branch stack entries */ + tidq = etmq->traceid_queues[idx]; + ret = cs_etm__end_block(etmq, tidq); + if (ret) + return ret; + } + } out: return ret; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 021e9b1d5cc5..f0599c61fab4 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -36,7 +36,7 @@ #include "util/sample.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #define pr_N(n, fmt, ...) \ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 20bfb0884e9e..8304cd2d4a9c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -28,7 +28,7 @@ #include "util/tool.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct convert_json { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d633d15329fa..995f6bb05b5f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -27,7 +27,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #else #define LIBTRACEEVENT_VERSION 0 #endif diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h index ad6422c3f8ca..a52d69932815 100644 --- a/tools/perf/util/debuginfo.h +++ b/tools/perf/util/debuginfo.h @@ -5,7 +5,7 @@ #include <errno.h> #include <linux/compiler.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" @@ -25,7 +25,7 @@ void debuginfo__delete(struct debuginfo *dbg); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, bool adjust_offset); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ /* dummy debug information structure */ struct debuginfo { @@ -49,7 +49,7 @@ static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unuse return -EINVAL; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_DEBUGINFOD_SUPPORT int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index f05ba7739c1e..41a2b08670dc 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -18,6 +18,7 @@ #include "disasm.h" #include "disasm_bpf.h" #include "dso.h" +#include "dwarf-regs.h" #include "env.h" #include "evsel.h" #include "map.h" @@ -151,14 +152,14 @@ static struct arch architectures[] = { .memory_ref_char = '(', .imm_char = '$', }, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_x86, #endif }, { .name = "powerpc", .init = powerpc__annotate_init, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_powerpc, #endif }, @@ -1423,6 +1424,15 @@ err: } #endif +#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT) +static void symbol__disassembler_missing(const char *disassembler, const char *filename, + struct symbol *sym) +{ + pr_debug("The %s disassembler isn't linked in for %s in %s\n", + disassembler, sym->name, filename); +} +#endif + #ifdef HAVE_LIBCAPSTONE_SUPPORT static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, struct annotate_args *args, u64 addr) @@ -1573,7 +1583,7 @@ static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *s dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); @@ -1603,18 +1613,6 @@ out: err: if (fd >= 0) close(fd); - if (needs_cs_close) { - struct disasm_line *tmp; - - /* - * It probably failed in the middle of the above loop. - * Release any resources it might add. - */ - list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { - list_del(&dl->al.node); - free(dl); - } - } count = -1; goto out; } @@ -1627,12 +1625,12 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, u64 start = map__rip_2objdump(map, sym->start); u64 len; u64 offset; - int i, count; + int i, count, free_count; bool is_64bit = false; bool needs_cs_close = false; u8 *buf = NULL; csh handle; - cs_insn *insn; + cs_insn *insn = NULL; char disasm_buf[512]; struct disasm_line *dl; @@ -1664,7 +1662,7 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, needs_cs_close = true; - count = cs_disasm(handle, buf, len, start, len, &insn); + free_count = count = cs_disasm(handle, buf, len, start, len, &insn); for (i = 0, offset = 0; i < count; i++) { int printed; @@ -1702,8 +1700,11 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, } out: - if (needs_cs_close) + if (needs_cs_close) { cs_close(&handle); + if (free_count > 0) + cs_free(insn, free_count); + } free(buf); return count < 0 ? count : 0; @@ -1717,13 +1718,27 @@ err: */ list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { list_del(&dl->al.node); - free(dl); + disasm_line__free(dl); } } count = -1; goto out; } -#endif +#else // HAVE_LIBCAPSTONE_SUPPORT +static int symbol__disassemble_capstone(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone", filename, sym); + return -1; +} + +static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone powerpc", filename, sym); + return -1; +} +#endif // HAVE_LIBCAPSTONE_SUPPORT static int symbol__disassemble_raw(char *filename, struct symbol *sym, struct annotate_args *args) @@ -1782,7 +1797,7 @@ static int symbol__disassemble_raw(char *filename, struct symbol *sym, sprintf(args->line, "%x", line[i]); dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); offset += 4; @@ -1991,7 +2006,14 @@ err: free(line_storage); return ret; } -#endif +#else // HAVE_LIBLLVM_SUPPORT +static int symbol__disassemble_llvm(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("LLVM", filename, sym); + return -1; +} +#endif // HAVE_LIBLLVM_SUPPORT /* * Possibly create a new version of line with tabs expanded. Returns the @@ -2053,17 +2075,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) return new_line; } -int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, + struct annotate_args *args) { struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract = false; - bool decomp = false; int lineno = 0; char *fileloc = NULL; int nline; @@ -2078,77 +2097,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) NULL, }; struct child_process objdump_process; - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); - - if (err) - return err; - - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso__long_name(dso), sym, sym->name); - - if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { - return symbol__disassemble_bpf_image(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { - return -1; - } else if (dso__is_kcore(dso)) { - kce.kcore_filename = symfs_filename; - kce.addr = map__rip_2objdump(map, sym->start); - kce.offs = sym->start; - kce.len = sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract = true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp = true; - strcpy(symfs_filename, tmp); - } - - /* - * For powerpc data type profiling, use the dso__data_read_offset - * to read raw instruction directly and interpret the binary code - * to understand instructions and register fields. For sort keys as - * type and typeoff, disassemble to mnemonic notation is - * not required in case of powerpc. - */ - if (arch__is(args->arch, "powerpc")) { - extern const char *sort_order; - - if (sort_order && !strstr(sort_order, "sym")) { - err = symbol__disassemble_raw(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif - } - } - -#ifdef HAVE_LIBLLVM_SUPPORT - err = symbol__disassemble_llvm(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif + int err; err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 @@ -2171,13 +2120,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; + return err; } pr_debug("Executing: %s\n", command); objdump_argv[2] = command; - objdump_argv[4] = symfs_filename; + objdump_argv[4] = filename; /* Create a pipe to read from for stdout */ memset(&objdump_process, 0, sizeof(objdump_process)); @@ -2215,8 +2164,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) break; /* Skip lines containing "filename:" */ - match = strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] == ':') + match = strstr(line, filename); + if (match && match[strlen(filename)] == ':') continue; expanded_line = strim(line); @@ -2261,7 +2210,150 @@ out_close_stdout: out_free_command: free(command); + return err; +} + +static int annotation_options__init_disassemblers(struct annotation_options *options) +{ + char *disassembler; + + if (options->disassemblers_str == NULL) { + const char *default_disassemblers_str = +#ifdef HAVE_LIBLLVM_SUPPORT + "llvm," +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + "capstone," +#endif + "objdump"; + + options->disassemblers_str = strdup(default_disassemblers_str); + if (!options->disassemblers_str) + goto out_enomem; + } + + disassembler = strdup(options->disassemblers_str); + if (disassembler == NULL) + goto out_enomem; + + while (1) { + char *comma = strchr(disassembler, ','); + + if (comma != NULL) + *comma = '\0'; + + options->disassemblers[options->nr_disassemblers++] = strim(disassembler); + + if (comma == NULL) + break; + + disassembler = comma + 1; + + if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { + pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", + MAX_DISASSEMBLERS, disassembler); + break; + } + } + + return 0; + +out_enomem: + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *options = args->options; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char symfs_filename[PATH_MAX]; + bool delete_extract = false; + struct kcore_extract kce; + const char *disassembler; + bool decomp = false; + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + + if (err) + return err; + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + return -1; + } else if (dso__is_kcore(dso)) { + kce.addr = map__rip_2objdump(map, sym->start); + kce.kcore_filename = symfs_filename; + kce.len = sym->end - sym->start; + kce.offs = sym->start; + + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) + return -1; + + decomp = true; + strcpy(symfs_filename, tmp); + } + + /* + * For powerpc data type profiling, use the dso__data_read_offset to + * read raw instruction directly and interpret the binary code to + * understand instructions and register fields. For sort keys as type + * and typeoff, disassemble to mnemonic notation is not required in + * case of powerpc. + */ + if (arch__is(args->arch, "powerpc")) { + extern const char *sort_order; + + if (sort_order && !strstr(sort_order, "sym")) { + err = symbol__disassemble_raw(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + } + + err = annotation_options__init_disassemblers(options); + if (err) + goto out_remove_tmp; + + err = -1; + + for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { + disassembler = options->disassemblers[i]; + + if (!strcmp(disassembler, "llvm")) + err = symbol__disassemble_llvm(symfs_filename, sym, args); + else if (!strcmp(disassembler, "capstone")) + err = symbol__disassemble_capstone(symfs_filename, sym, args); + else if (!strcmp(disassembler, "objdump")) + err = symbol__disassemble_objdump(symfs_filename, sym, args); + else + pr_debug("Unknown disassembler %s, skipping...\n", disassembler); + } + + if (err == 0) { + pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", + disassembler, options->disassemblers_str); + } out_remove_tmp: if (decomp) unlink(symfs_filename); diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h index f56beedeb9da..c135db2416b5 100644 --- a/tools/perf/util/disasm.h +++ b/tools/perf/util/disasm.h @@ -4,7 +4,7 @@ #include "map_symbol.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #endif @@ -39,11 +39,15 @@ struct arch { char memory_ref_char; char imm_char; } objdump; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT void (*update_insn_state)(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl); #endif + /** @e_machine: ELF machine associated with arch. */ + unsigned int e_machine; + /** @e_flags: Optional ELF flags associated with arch. */ + unsigned int e_flags; }; struct ins { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 92eb9c8dc3e5..559c953ca172 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1182,7 +1182,6 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) static int reg_from_dwarf_op(Dwarf_Op *op) { switch (op->atom) { @@ -1245,9 +1244,7 @@ static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) } return true; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1697,9 +1694,7 @@ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types) die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem); } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info @@ -1732,7 +1727,6 @@ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset) } return -1; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ /* * die_has_loclist - Check if DW_AT_location of @vr_die is a location list diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index bd7505812569..892c8c5c23fc 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -9,7 +9,6 @@ #include <elfutils/libdw.h> #include <elfutils/libdwfl.h> #include <elfutils/version.h> -#include <errno.h> struct strbuf; @@ -157,8 +156,6 @@ Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_m /* Return type info where the pointer and offset point to */ Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem); -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT - /* Get byte offset range of given variable DIE */ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); @@ -177,58 +174,7 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); /* Save all global variables in this CU */ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types); -#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) -{ - return -ENOTSUP; -} - -static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr pc __maybe_unused, - int reg __maybe_unused, - int *poffset __maybe_unused, - bool is_fbreg __maybe_unused, - Dwarf_Die *die_mem __maybe_unused) -{ - return NULL; -} - -static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr addr __maybe_unused, - Dwarf_Die *die_mem __maybe_unused, - int *offset __maybe_unused) -{ - return NULL; -} - -static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -#ifdef HAVE_DWARF_CFI_SUPPORT - /* Get the frame base information from CFA */ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset); -#else /* HAVE_DWARF_CFI_SUPPORT */ - -static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused, - int *preg __maybe_unused, int *poffset __maybe_unused) -{ - return -1; -} - -#endif /* HAVE_DWARF_CFI_SUPPORT */ - #endif /* _DWARF_AUX_H */ diff --git a/tools/perf/arch/csky/util/dwarf-regs.c b/tools/perf/util/dwarf-regs-csky.c index ca86ecaeacbb..d38ef1f07f3e 100644 --- a/tools/perf/arch/csky/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs-csky.c @@ -5,9 +5,8 @@ #include <stddef.h> #include <dwarf-regs.h> -#if defined(__CSKYABIV2__) -#define CSKY_MAX_REGS 73 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { +#define CSKY_ABIV2_MAX_REGS 73 +const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = { /* r0 ~ r8 */ "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", /* r9 ~ r15 */ @@ -26,9 +25,9 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#else -#define CSKY_MAX_REGS 57 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { + +#define CSKY_ABIV1_MAX_REGS 57 +const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = { /* r0 ~ r8 */ "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", /* r9 ~ r15 */ @@ -41,9 +40,11 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#endif -const char *get_arch_regstr(unsigned int n) +const char *get_csky_regstr(unsigned int n, unsigned int flags) { - return (n < CSKY_MAX_REGS) ? csky_dwarf_regs_table[n] : NULL; + if (flags & EF_CSKY_ABIV2) + return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL; + + return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL; } diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c new file mode 100644 index 000000000000..caf77a234c78 --- /dev/null +++ b/tools/perf/util/dwarf-regs-powerpc.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Ian Munsie, IBM Corporation. + */ + +#include <dwarf-regs.h> + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c new file mode 100644 index 000000000000..7a55c65e8da6 --- /dev/null +++ b/tools/perf/util/dwarf-regs-x86.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/kernel.h> /* for ARRAY_SIZE */ +#include <dwarf-regs.h> + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_x86_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 5b7f86c0063f..28a1cfdf26d4 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -13,14 +13,6 @@ #include <errno.h> #include <linux/kernel.h> -#ifndef EM_AARCH64 -#define EM_AARCH64 183 /* ARM 64 bit */ -#endif - -#ifndef EM_LOONGARCH -#define EM_LOONGARCH 258 /* LoongArch */ -#endif - /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -28,6 +20,7 @@ #include "../arch/arm64/include/dwarf-regs-table.h" #include "../arch/sh/include/dwarf-regs-table.h" #include "../arch/powerpc/include/dwarf-regs-table.h" +#include "../arch/riscv/include/dwarf-regs-table.h" #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" @@ -37,11 +30,13 @@ #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) /* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine) +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags) { + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ - return get_arch_regstr(n); case EM_386: return __get_dwarf_regstr(x86_32_regstr_tbl, n); case EM_X86_64: @@ -50,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(arm_regstr_tbl, n); case EM_AARCH64: return __get_dwarf_regstr(aarch64_regstr_tbl, n); + case EM_CSKY: + return get_csky_regstr(n, flags); case EM_SH: return __get_dwarf_regstr(sh_regstr_tbl, n); case EM_S390: @@ -57,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) case EM_PPC: case EM_PPC64: return __get_dwarf_regstr(powerpc_regstr_tbl, n); + case EM_RISCV: + return __get_dwarf_regstr(riscv_regstr_tbl, n); case EM_SPARC: case EM_SPARCV9: return __get_dwarf_regstr(sparc_regstr_tbl, n); @@ -72,13 +71,15 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return NULL; } +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 __weak int get_arch_regnum(const char *name __maybe_unused) { return -ENOTSUP; } +#endif /* Return DWARF register number from architecture register name */ -int get_dwarf_regnum(const char *name, unsigned int machine) +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused) { char *regname = strdup(name); int reg = -1; @@ -92,10 +93,21 @@ int get_dwarf_regnum(const char *name, unsigned int machine) if (p) *p = '\0'; + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 + case EM_HOST: reg = get_arch_regnum(regname); break; +#endif + case EM_X86_64: + fallthrough; + case EM_386: + reg = get_x86_regnum(regname); + break; default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 1edbccfc3281..e2843ca2edd9 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,12 +5,14 @@ #include "util/header.h" #include "linux/compiler.h" #include <linux/ctype.h> +#include <linux/string.h> #include <linux/zalloc.h> #include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmu.h" #include "pmus.h" #include "strbuf.h" #include "trace/beauty/beauty.h" @@ -372,7 +374,8 @@ error: int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; - int err = get_cpuid(cpuid, sizeof(cpuid)); + struct perf_cpu cpu = {-1}; + int err = get_cpuid(cpuid, sizeof(cpuid), cpu); if (err) return err; @@ -639,3 +642,25 @@ void perf_env__find_br_cntr_info(struct perf_env *env, env->pmu_caps->br_cntr_width; } } + +bool perf_env__is_x86_amd_cpu(struct perf_env *env) +{ + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd == 0) + is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1; + + return is_amd >= 1 ? true : false; +} + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_amd; + + perf_env__cpuid(&env); + is_amd = perf_env__is_x86_amd_cpu(&env); + perf_env__exit(&env); + + return is_amd; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 51b36c36019b..ae604c4edbb7 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -195,4 +195,8 @@ bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); void perf_env__find_br_cntr_info(struct perf_env *env, unsigned int *nr, unsigned int *width); + +bool x86__is_amd_cpu(void); +bool perf_env__is_x86_amd_cpu(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f8742e6230a5..2744c54f404e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -66,6 +66,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, + PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, }; #define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f14b7e6ff1dc..f0dd174e2deb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -48,6 +48,7 @@ #include <sys/mman.h> #include <sys/prctl.h> #include <sys/timerfd.h> +#include <sys/wait.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -319,62 +320,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) } #endif -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - struct evsel *evsel, *n; - LIST_HEAD(head); - size_t i; - - for (i = 0; i < nr_attrs; i++) { - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - return -1; -} - -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - size_t i; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - return evlist__add_attrs(evlist, attrs, nr_attrs); -} - -__weak int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return __evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && - (int)evsel->core.attr.config == id) - return evsel; - } - - return NULL; -} - struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1199,11 +1144,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) return ret; } -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) -{ - return evlist__set_tp_filter_pids(evlist, 1, &pid); -} - int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); @@ -1484,6 +1424,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const int child_ready_pipe[2], go_pipe[2]; char bf; + evlist->workload.cork_fd = -1; + if (pipe(child_ready_pipe) < 0) { perror("failed to create 'ready' pipe"); return -1; @@ -1536,7 +1478,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() - * here. + * here (See evlist__cancel_workload()). */ if (ret != 1) { if (ret == -1) @@ -1600,7 +1542,7 @@ out_close_ready_pipe: int evlist__start_workload(struct evlist *evlist) { - if (evlist->workload.cork_fd > 0) { + if (evlist->workload.cork_fd >= 0) { char bf = 0; int ret; /* @@ -1611,12 +1553,24 @@ int evlist__start_workload(struct evlist *evlist) perror("unable to write to pipe"); close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; return ret; } return 0; } +void evlist__cancel_workload(struct evlist *evlist) +{ + int status; + + if (evlist->workload.cork_fd >= 0) { + close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; + waitpid(evlist->workload.pid, &status, WNOHANG); + } +} + int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = evlist__event2evsel(evlist, event); @@ -2619,7 +2573,8 @@ void evlist__uniquify_name(struct evlist *evlist) else attributes = empty_attributes; - if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "", + pos->name, attributes + 1)) { free(pos->name); pos->name = new_name; } else { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bcc1c6984bb5..adddb1db1ad2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -102,18 +102,6 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); - -int __evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs); - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs); - -#define evlist__add_default_attrs(evlist, array) \ - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); int evlist__add_dummy(struct evlist *evlist); @@ -144,7 +132,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) int evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); int evlist__append_tp_filter(struct evlist *evlist, const char *filter); @@ -152,7 +139,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); @@ -186,6 +172,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); int evlist__start_workload(struct evlist *evlist); +void evlist__cancel_workload(struct evlist *evlist); struct option; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5..d22c5df1701e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,12 +5,16 @@ * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <byteswap.h> #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> -#include <api/io.h> #include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <linux/hw_breakpoint.h> @@ -20,6 +24,7 @@ #include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <dirent.h> #include <stdlib.h> @@ -51,6 +56,8 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" @@ -64,46 +71,135 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct perf_missing_features perf_missing_features; static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { - NULL, - "duration_time", - "user_time", - "system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { - if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) - return perf_tool_event__tool_names[ev]; - - return NULL; + return 0; } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void) { - int i; + static bool test_attr__enabled; + static bool test_attr__enabled_tested; + + if (!test_attr__enabled_tested) { + char *dir = getenv("PERF_TEST_ATTR"); - perf_tool_event__for_each_event(i) { - if (!strcmp(str, perf_tool_event__tool_names[i])) - return i; + test_attr__enabled = (dir != NULL); + test_attr__enabled_tested = true; } - return PERF_TOOL_NONE; + return test_attr__enabled; } +#define __WRITE_ASS(str, fmt, data) \ +do { \ + if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \ + perror("test attr - failed to write event file"); \ + fclose(file); \ + return -1; \ + } \ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) { + FILE *file; + char path[PATH_MAX]; + char *dir = getenv("PERF_TEST_ATTR"); + + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, + attr->type, attr->config, fd); + + file = fopen(path, "w+"); + if (!file) { + perror("test attr - failed to open event file"); + return -1; + } + + if (fprintf(file, "[event-%d-%llu-%d]\n", + attr->type, attr->config, fd) < 0) { + perror("test attr - failed to write event file"); + fclose(file); + return -1; + } + + /* syscall arguments */ + __WRITE_ASS(fd, "d", fd); + __WRITE_ASS(group_fd, "d", group_fd); + __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(pid, "d", pid); + __WRITE_ASS(flags, "lu", flags); + + /* struct perf_event_attr */ + WRITE_ASS(type, PRIu32); + WRITE_ASS(size, PRIu32); + WRITE_ASS(config, "llu"); + WRITE_ASS(sample_period, "llu"); + WRITE_ASS(sample_type, "llu"); + WRITE_ASS(read_format, "llu"); + WRITE_ASS(disabled, "d"); + WRITE_ASS(inherit, "d"); + WRITE_ASS(pinned, "d"); + WRITE_ASS(exclusive, "d"); + WRITE_ASS(exclude_user, "d"); + WRITE_ASS(exclude_kernel, "d"); + WRITE_ASS(exclude_hv, "d"); + WRITE_ASS(exclude_idle, "d"); + WRITE_ASS(mmap, "d"); + WRITE_ASS(comm, "d"); + WRITE_ASS(freq, "d"); + WRITE_ASS(inherit_stat, "d"); + WRITE_ASS(enable_on_exec, "d"); + WRITE_ASS(task, "d"); + WRITE_ASS(watermark, "d"); + WRITE_ASS(precise_ip, "d"); + WRITE_ASS(mmap_data, "d"); + WRITE_ASS(sample_id_all, "d"); + WRITE_ASS(exclude_host, "d"); + WRITE_ASS(exclude_guest, "d"); + WRITE_ASS(exclude_callchain_kernel, "d"); + WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); + WRITE_ASS(wakeup_events, PRIu32); + WRITE_ASS(bp_type, PRIu32); + WRITE_ASS(config1, "llu"); + WRITE_ASS(config2, "llu"); + WRITE_ASS(branch_sample_type, "llu"); + WRITE_ASS(sample_regs_user, "llu"); + WRITE_ASS(sample_stack_user, PRIu32); + + fclose(file); return 0; } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) +{ + int errno_saved = errno; + + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } + + errno = errno_saved; +} static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { @@ -296,9 +392,9 @@ void evsel__init(struct evsel *evsel, evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; - evsel->pmu_name = NULL; evsel->group_pmu_name = NULL; evsel->skippable = false; + evsel->alternate_hw_config = PERF_COUNT_HW_MAX; } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -393,11 +489,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->group_name == NULL) goto out_err; } - if (orig->pmu_name) { - evsel->pmu_name = strdup(orig->pmu_name); - if (evsel->pmu_name == NULL) - goto out_err; - } if (orig->group_pmu_name) { evsel->group_pmu_name = strdup(orig->group_pmu_name); if (evsel->group_pmu_name == NULL) @@ -421,7 +512,6 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.leader = orig->core.leader; evsel->max_events = orig->max_events; - evsel->tool_event = orig->tool_event; free((char *)evsel->unit); evsel->unit = strdup(orig->unit); if (evsel->unit == NULL) @@ -445,6 +535,8 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; + evsel->alternate_hw_config = orig->alternate_hw_config; + return evsel; out_err: @@ -548,7 +640,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; - bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ @@ -560,17 +651,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) MOD_PRINT(kernel, 'k'); MOD_PRINT(user, 'u'); MOD_PRINT(hv, 'h'); - exclude_guest_default = true; } if (attr->precise_ip) { if (!colon) colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); - exclude_guest_default = true; } - if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + if (attr->exclude_host || attr->exclude_guest) { MOD_PRINT(host, 'H'); MOD_PRINT(guest, 'G'); } @@ -617,11 +706,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ - return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} - static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -772,10 +856,7 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel__is_tool(evsel)) - evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf)); - else - evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -786,6 +867,10 @@ const char *evsel__name(struct evsel *evsel) evsel__bp_name(evsel, bf, sizeof(bf)); break; + case PERF_PMU_TYPE_TOOL: + scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); + break; + default: scnprintf(bf, sizeof(bf), "unknown attr type: %d", evsel->core.attr.type); @@ -811,7 +896,7 @@ const char *evsel__metric_id(const struct evsel *evsel) return evsel->metric_id; if (evsel__is_tool(evsel)) - return perf_tool_event__to_str(evsel__tool_event(evsel)); + return evsel__tool_pmu_event_name(evsel); return "unknown"; } @@ -862,7 +947,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o { bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - const char *arch = perf_env__arch(evsel__env(evsel)); evsel__set_sample_bit(evsel, CALLCHAIN); @@ -893,6 +977,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { + const char *arch = perf_env__arch(evsel__env(evsel)); + evsel__set_sample_bit(evsel, REGS_USER); evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && @@ -1150,7 +1236,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; - attr->inherit = !opts->no_inherit; + attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; attr->read_format = PERF_FORMAT_LOST; @@ -1172,7 +1258,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |= PERF_FORMAT_GROUP; - attr->inherit = 0; + } + + /* + * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format + */ + if (attr->inherit) { + evsel__set_sample_bit(evsel, TID); + evsel->core.attr.read_format |= + PERF_FORMAT_ID; } } @@ -1494,7 +1588,6 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->filter); - zfree(&evsel->pmu_name); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); @@ -1503,8 +1596,8 @@ void evsel__exit(struct evsel *evsel) evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); - if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) xyarray__delete(evsel->start_times); } @@ -1684,171 +1777,31 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -static bool read_until_char(struct io *io, char e) -{ - int c; - - do { - c = io__get_char(io); - if (c == -1) - return false; - } while (c != e); - return true; -} - -static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) -{ - char buf[256]; - struct io io; - int i; - - io__init(&io, fd, buf, sizeof(buf)); - - /* Skip lines to relevant CPU. */ - for (i = -1; i < cpu.cpu; i++) { - if (!read_until_char(&io, '\n')) - return -EINVAL; - } - /* Skip to "cpu". */ - if (io__get_char(&io) != 'c') return -EINVAL; - if (io__get_char(&io) != 'p') return -EINVAL; - if (io__get_char(&io) != 'u') return -EINVAL; - - /* Skip N of cpuN. */ - if (!read_until_char(&io, ' ')) - return -EINVAL; - - i = 1; - while (true) { - if (io__get_dec(&io, val) != ' ') - break; - if (field == i) - return 0; - i++; - } - return -EINVAL; -} - -static int read_pid_stat_field(int fd, int field, __u64 *val) +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) { - char buf[256]; - struct io io; - int c, i; - io__init(&io, fd, buf, sizeof(buf)); - if (io__get_dec(&io, val) != ' ') - return -EINVAL; - if (field == 1) - return 0; - - /* Skip comm. */ - if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) - return -EINVAL; - if (field == 2) - return -EINVAL; /* String can't be returned. */ - - /* Skip state */ - if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) - return -EINVAL; - if (field == 3) - return -EINVAL; /* String can't be returned. */ - - /* Loop over numeric fields*/ - if (io__get_char(&io) != ' ') - return -EINVAL; + u32 e_type = evsel->core.attr.type; + u64 e_config = evsel->core.attr.config; - i = 4; - while (true) { - c = io__get_dec(&io, val); - if (c == -1) - return -EINVAL; - if (c == -2) { - /* Assume a -ve was read */ - c = io__get_dec(&io, val); - *val *= -1; - } - if (c != ' ') - return -EINVAL; - if (field == i) - return 0; - i++; + if (e_type != type) { + return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config; } - return -EINVAL; -} - -static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread) -{ - __u64 *start_time, cur_time, delta_start; - int fd, err = 0; - struct perf_counts_values *count; - bool adjust = false; - count = perf_counts(evsel->counts, cpu_map_idx, thread); + if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + e_config &= PERF_HW_EVENT_MASK; - switch (evsel__tool_event(evsel)) { - case PERF_TOOL_DURATION_TIME: - /* - * Pretend duration_time is only on the first CPU and thread, or - * else aggregation will scale duration_time by the number of - * CPUs/threads. - */ - start_time = &evsel->start_time; - if (cpu_map_idx == 0 && thread == 0) - cur_time = rdclock(); - else - cur_time = *start_time; - break; - case PERF_TOOL_USER_TIME: - case PERF_TOOL_SYSTEM_TIME: { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - - start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); - fd = FD(evsel, cpu_map_idx, thread); - lseek(fd, SEEK_SET, 0); - if (evsel->pid_stat) { - /* The event exists solely on 1 CPU. */ - if (cpu_map_idx == 0) - err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); - else - cur_time = 0; - } else { - /* The event is for all threads. */ - if (thread == 0) { - struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, - cpu_map_idx); - - err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); - } else { - cur_time = 0; - } - } - adjust = true; - break; - } - case PERF_TOOL_NONE: - case PERF_TOOL_MAX: - default: - err = -EINVAL; - } - if (err) - return err; - - delta_start = cur_time - *start_time; - if (adjust) { - __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); - - delta_start *= 1000000000 / ticks_per_sec; - } - count->val = delta_start; - count->ena = count->run = delta_start; - count->lost = 0; - return 0; + return e_config == config; } int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { if (evsel__is_tool(evsel)) - return evsel__read_tool(evsel, cpu_map_idx, thread); + return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_hwmon(evsel)) + return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); if (evsel__is_retire_lat(evsel)) return evsel__read_retire_lat(evsel, cpu_map_idx, thread); @@ -2042,6 +1995,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { + int ret = 0; int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -2072,23 +2026,21 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; - if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) && - !evsel->start_times) { - evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); - if (!evsel->start_times) - return -ENOMEM; - } + if (evsel__is_tool(evsel)) + ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads); evsel->open_flags = PERF_FLAG_FD_CLOEXEC; if (evsel->cgrp) evsel->open_flags |= PERF_FLAG_PID_CGROUP; - return 0; + return ret; } static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) + evsel->core.attr.inherit = 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -2138,120 +2090,346 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) +{ + int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + + if (fd < 0) { + attr->exclude_kernel = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_hv = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_guest = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + attr->exclude_kernel = 0; + attr->exclude_guest = 0; + attr->exclude_hv = 0; + + return fd >= 0; +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel) { + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + }; + struct perf_pmu *pmu = evsel->pmu; + int old_errno; + + old_errno = errno; + + if (pmu == NULL) + pmu = evsel->pmu = evsel__find_pmu(evsel); + + if (pmu == NULL || pmu->missing_features.checked) + goto out; + /* * Must probe features in the order they were added to the - * perf_event_attr interface. + * perf_event_attr interface. These are kernel core limitation but + * specific to PMUs with branch stack. So we can detect with the given + * hardware event and stop on the first one succeeded. */ - if (!perf_missing_features.branch_counters && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { - perf_missing_features.branch_counters = true; - pr_debug2("switching off branch counters support\n"); + + /* Please add new feature detection here. */ + + attr.exclude_guest = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + pmu->missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: + pmu->missing_features.checked = true; +out: + errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + .sample_type = PERF_SAMPLE_BRANCH_STACK, + .sample_period = 1000, + }; + int old_errno; + + if (detection_done) + return; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are PMU specific limitation + * so we can detect with the given hardware event and stop on the + * first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.lbr_flags = true; + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: + detection_done = true; + errno = old_errno; +} + +static bool evsel__detect_missing_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .disabled = 1, + }; + int old_errno; + + evsel__detect_missing_pmu_features(evsel); + + if (evsel__has_br_stack(evsel)) + evsel__detect_missing_brstack_features(evsel); + + if (detection_done) + goto check; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are kernel core limitation + * not PMU-specific so we can detect with a software event and + * stop on the first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.inherit = true; + attr.sample_type = PERF_SAMPLE_READ; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.inherit_sample_read = true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); + attr.inherit = false; + attr.sample_type = 0; + + attr.read_format = PERF_FORMAT_LOST; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + attr.read_format = 0; + + attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.cgroup = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support\n"); + attr.cgroup = 0; + + attr.aux_output = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.aux_output = true; + pr_debug2_peo("Kernel has no attr.aux_output support\n"); + attr.aux_output = 0; + + attr.bpf_event = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.bpf = true; + pr_debug2_peo("switching off bpf_event\n"); + attr.bpf_event = 0; + + attr.ksymbol = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.ksymbol = true; + pr_debug2_peo("switching off ksymbol\n"); + attr.ksymbol = 0; + + attr.write_backward = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.write_backward = true; + pr_debug2_peo("switching off write_backward\n"); + attr.write_backward = 0; + + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.clockid = true; + pr_debug2_peo("switching off clockid\n"); + attr.use_clockid = 0; + attr.clockid = 0; + + if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) + goto found; + perf_missing_features.cloexec = true; + pr_debug2_peo("switching off cloexec flag\n"); + + attr.mmap2 = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.mmap2 = true; + pr_debug2_peo("switching off mmap2\n"); + attr.mmap2 = 0; + + /* set this unconditionally? */ + perf_missing_features.sample_id_all = true; + pr_debug2_peo("switching off sample_id_all\n"); + + attr.inherit = 1; + attr.read_format = PERF_FORMAT_GROUP; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.group_read = true; + pr_debug2_peo("switching off group read\n"); + attr.inherit = 0; + attr.read_format = 0; + +found: + detection_done = true; + errno = old_errno; + +check: + if (evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && + perf_missing_features.inherit_sample_read) return true; - } else if (!perf_missing_features.read_lost && - (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { - perf_missing_features.read_lost = true; - pr_debug2("switching off PERF_FORMAT_LOST support\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + perf_missing_features.branch_counters) return true; - } else if (!perf_missing_features.weight_struct && - (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { - perf_missing_features.weight_struct = true; - pr_debug2("switching off weight struct support\n"); + + if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && + perf_missing_features.read_lost) return true; - } else if (!perf_missing_features.code_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { - perf_missing_features.code_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.data_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { - perf_missing_features.data_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { - perf_missing_features.cgroup = true; - pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - return false; - } else if (!perf_missing_features.branch_hw_idx && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { - perf_missing_features.branch_hw_idx = true; - pr_debug2("switching off branch HW index support\n"); + + if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && + perf_missing_features.weight_struct) return true; - } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { - perf_missing_features.aux_output = true; - pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - return false; - } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { - perf_missing_features.bpf = true; - pr_debug2_peo("switching off bpf_event\n"); + + if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && + !perf_missing_features.clockid) { + perf_missing_features.clockid_wrong = true; return true; - } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { - perf_missing_features.ksymbol = true; - pr_debug2_peo("switching off ksymbol\n"); + } + + if (evsel->core.attr.use_clockid && perf_missing_features.clockid) return true; - } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { - perf_missing_features.write_backward = true; - pr_debug2_peo("switching off write_backward\n"); - return false; - } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { - perf_missing_features.clockid_wrong = true; - pr_debug2_peo("switching off clockid\n"); + + if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && + perf_missing_features.cloexec) return true; - } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { - perf_missing_features.clockid = true; - pr_debug2_peo("switching off use_clockid\n"); + + if (evsel->core.attr.mmap2 && perf_missing_features.mmap2) return true; - } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { - perf_missing_features.cloexec = true; - pr_debug2_peo("switching off cloexec flag\n"); + + if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES)) && + perf_missing_features.lbr_flags) return true; - } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { - perf_missing_features.mmap2 = true; - pr_debug2_peo("switching off mmap2\n"); + + if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && + perf_missing_features.group_read) return true; - } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { - if (evsel->pmu == NULL) - evsel->pmu = evsel__find_pmu(evsel); - - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) return true; - } else if (!perf_missing_features.sample_id_all) { - perf_missing_features.sample_id_all = true; - pr_debug2_peo("switching off sample_id_all\n"); + + if (evsel->core.attr.bpf_event && perf_missing_features.bpf) return true; - } else if (!perf_missing_features.lbr_flags && - (evsel->core.attr.branch_sample_type & - (PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS))) { - perf_missing_features.lbr_flags = true; - pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && + perf_missing_features.branch_hw_idx) return true; - } else if (!perf_missing_features.group_read && - evsel->core.attr.inherit && - (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - evsel__is_group_leader(evsel)) { - perf_missing_features.group_read = true; - pr_debug2_peo("switching off group read\n"); + + if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all) + return true; + + return false; +} + +static bool evsel__handle_error_quirks(struct evsel *evsel, int error) +{ + /* + * AMD core PMU tries to forward events with precise_ip to IBS PMU + * implicitly. But IBS PMU has more restrictions so it can fail with + * supported event attributes. Let's forward it back to the core PMU + * by clearing precise_ip only if it's from precise_max (:P). + */ + if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && + evsel->core.attr.precise_ip && evsel->precise_max) { + evsel->core.attr.precise_ip = 0; + pr_debug2_peo("removing precise_ip on AMD\n"); + display_attr(&evsel->core.attr); return true; - } else { - return false; } + + return false; } static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -2262,13 +2440,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; - if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) { - if (evsel->core.attr.sample_period) /* no sampling */ - return -EINVAL; - evsel->start_time = rdclock(); - return 0; - } - if (evsel__is_retire_lat(evsel)) return tpebs_start(evsel->evlist); @@ -2293,6 +2464,17 @@ fallback_missing_features: pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); + if (evsel__is_tool(evsel)) { + return evsel__tool_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + if (evsel__is_hwmon(evsel)) { + return evsel__hwmon_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { @@ -2304,46 +2486,6 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME || - evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - __u64 *start_time = NULL; - - if (evsel->core.attr.sample_period) { - /* no sampling */ - err = -EINVAL; - goto out_close; - } - if (pid > -1) { - char buf[64]; - - snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); - fd = open(buf, O_RDONLY); - evsel->pid_stat = true; - } else { - fd = open("/proc/stat", O_RDONLY); - } - FD(evsel, idx, thread) = fd; - if (fd < 0) { - err = -errno; - goto out_close; - } - start_time = xyarray__entry(evsel->start_times, idx, thread); - if (pid > -1) { - err = read_pid_stat_field(fd, system ? 15 : 14, - start_time); - } else { - struct perf_cpu cpu; - - cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); - err = read_stat_field(fd, cpu, system ? 3 : 1, - start_time); - } - if (err) - goto out_close; - continue; - } - group_fd = get_group_fd(evsel, idx, thread); if (group_fd == -2) { @@ -2352,8 +2494,6 @@ retry_open: goto out_close; } - test_attr__ready(); - /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); @@ -2374,7 +2514,7 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); - if (unlikely(test_attr__enabled)) { + if (unlikely(test_attr__enabled())) { test_attr__open(&evsel->core.attr, pid, perf_cpu_map__cpu(cpus, idx), fd, group_fd, evsel->open_flags); @@ -2415,9 +2555,6 @@ retry_open: return 0; try_fallback: - if (evsel__precise_ip_fallback(evsel)) - goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2434,11 +2571,15 @@ try_fallback: if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) - goto out_close; - - if (evsel__detect_missing_features(evsel)) + if (err == -EINVAL && evsel__detect_missing_features(evsel)) goto fallback_missing_features; + + if (evsel__precise_ip_fallback(evsel)) + goto retry_open; + + if (evsel__handle_error_quirks(evsel, err)) + goto retry_open; + out_close: if (err) threads->err_thread = thread; @@ -3245,6 +3386,27 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, evsel->core.attr.exclude_hv = 1; return true; + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + return false; + + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + + return true; } return false; @@ -3415,7 +3577,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg | grep -i perf may provide additional information.\n", + "\"dmesg | grep -i perf\" may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 15e745a9a798..04934a7af174 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,6 +11,7 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include "pmus.h" +#include "pmu.h" struct bpf_object; struct cgroup; @@ -22,25 +23,9 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); -enum perf_tool_event { - PERF_TOOL_NONE = 0, - PERF_TOOL_DURATION_TIME = 1, - PERF_TOOL_USER_TIME = 2, - PERF_TOOL_SYSTEM_TIME = 3, - - PERF_TOOL_MAX, -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev); -enum perf_tool_event perf_tool_event__from_str(const char *str); - -#define perf_tool_event__for_each_event(ev) \ - for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -72,7 +57,6 @@ struct evsel { struct { char *name; char *group_name; - const char *pmu_name; const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; @@ -83,7 +67,6 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; - enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -102,6 +85,7 @@ struct evsel { int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; + u64 alternate_hw_config; }; /* @@ -183,7 +167,7 @@ struct evsel { unsigned long open_flags; int precise_ip_original; - /* for missing_features */ + /* The PMU the event is from. Used for missing_features, PMU name, etc. */ struct perf_pmu *pmu; /* For tool events */ @@ -221,6 +205,7 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool inherit_sample_read; }; extern struct perf_missing_features perf_missing_features; @@ -320,21 +305,11 @@ const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); const char *evsel__metric_id(const struct evsel *evsel); -static inline bool evsel__is_tool(const struct evsel *evsel) -{ - return evsel->tool_event != PERF_TOOL_NONE; -} - static inline bool evsel__is_retire_lat(const struct evsel *evsel) { return evsel->retire_lat; } -static inline enum perf_tool_event evsel__tool_event(const struct evsel *evsel) -{ - return evsel->tool_event; -} - const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -368,7 +343,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel); int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -bool evsel__detect_missing_features(struct evsel *evsel); bool evsel__precise_ip_fallback(struct evsel *evsel); @@ -393,26 +367,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name); -static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) -{ - if (evsel->core.attr.type != type) - return false; - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) - return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; - - return evsel->core.attr.config == config; -} +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config); #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) -static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) -{ - return (e1->core.attr.type == e2->core.attr.type) && - (e1->core.attr.config == e2->core.attr.config); -} - int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index c2c0500d5da9..86b7f46f9e2a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -14,7 +14,7 @@ #include "dso.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b2536a59c44e..f289044a1f7c 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,25 +5,22 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "evlist.h" #include "expr.h" +#include "smt.h" +#include "tool_pmu.h" #include <util/expr-bison.h> #include <util/expr-flex.h> #include "util/hashmap.h" #include "util/header.h" #include "util/pmu.h" -#include "smt.h" -#include "tsc.h" -#include <api/fs/fs.h> +#include <perf/cpumap.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> #include <math.h> -#include "pmu.h" struct expr_id_data { union { @@ -393,90 +390,26 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } -#if !defined(__i386__) && !defined(__x86_64__) -double arch_get_tsc_freq(void) -{ - return 0.0; -} -#endif - -static double has_pmem(void) -{ - static bool has_pmem, cached; - const char *sysfs = sysfs__mountpoint(); - char path[PATH_MAX]; - - if (!cached) { - snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); - has_pmem = access(path, F_OK) == 0; - cached = true; - } - return has_pmem ? 1.0 : 0.0; -} - double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { - const struct cpu_topology *topology; double result = NAN; + enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1); - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - if (!strcmp("#num_cpus_online", literal)) { - struct perf_cpu_map *online = cpu_map__online(); - - if (online) - result = perf_cpu_map__nr(online); - goto out; - } + if (ev != TOOL_PMU__EVENT_NONE) { + u64 count; - if (!strcasecmp("#system_tsc_freq", literal)) { - result = arch_get_tsc_freq(); - goto out; - } + if (tool_pmu__read_event(ev, &count)) + result = count; + else + pr_err("Failure to read '%s'", literal); - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#core_wide", literal)) { + } else if (!strcmp("#core_wide", literal)) { result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_packages", literal)) { - topology = online_topology(); - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - topology = online_topology(); - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - topology = online_topology(); - result = topology->core_cpus_lists; - goto out; - } - if (!strcmp("#slots", literal)) { - result = perf_pmu__cpu_slots_per_cycle(); - goto out; - } - if (!strcmp("#has_pmem", literal)) { - result = has_pmem(); - goto out; + } else { + pr_err("Unrecognized literal '%s'", literal); } - pr_err("Unrecognized literal '%s'", literal); -out: pr_debug2("literal: %s = %f\n", literal, result); return result; } @@ -523,8 +456,8 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, const char *test_id) { double ret; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); if (!cpuid) return NAN; diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c8f6bee1fa61..cdce7f173d00 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -16,7 +16,7 @@ #include <inttypes.h> #include <fcntl.h> #include <err.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include <dwarf.h> #endif @@ -499,7 +499,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 4e2e4f40e134..9f0b875d6548 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -8,7 +8,7 @@ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #endif diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a6386d12afd7..3451e542b69a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -58,7 +58,7 @@ #include <internal/lib.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif /* @@ -819,11 +819,31 @@ static int write_group_desc(struct feat_fd *ff, * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return NULL; } +char *get_cpuid_allow_env_override(struct perf_cpu cpu) +{ + char *cpuid; + static bool printed; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(cpu); + if (!cpuid) + return NULL; + + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } + return cpuid; +} + /* Return zero when the cpuid from the mapfile.csv matches the * cpuid string generated on this platform. * Otherwise return non-zero. @@ -856,18 +876,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ -int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) +int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused, + struct perf_cpu cpu __maybe_unused) { return ENOSYS; /* Not implemented */ } -static int write_cpuid(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_cpuid(struct feat_fd *ff, struct evlist *evlist) { + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); char buffer[64]; int ret; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) return -1; @@ -987,57 +1008,6 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } -/* - * Check whether a CPU is online - * - * Returns: - * 1 -> if CPU is online - * 0 -> if CPU is offline - * -1 -> error case - */ -int is_cpu_online(unsigned int cpu) -{ - char *str; - size_t strlen; - char buf[256]; - int status = -1; - struct stat statbuf; - - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d", cpu); - if (stat(buf, &statbuf) != 0) - return 0; - - /* - * Check if /sys/devices/system/cpu/cpux/online file - * exists. Some cases cpu0 won't have online file since - * it is not expected to be turned off generally. - * In kernels without CONFIG_HOTPLUG_CPU, this - * file won't exist - */ - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d/online", cpu); - if (stat(buf, &statbuf) != 0) - return 1; - - /* - * Read online file using sysfs__read_str. - * If read or open fails, return -1. - * If read succeeds, return value from file - * which gets stored in "str" - */ - snprintf(buf, sizeof(buf), - "devices/system/cpu/cpu%d/online", cpu); - - if (sysfs__read_str(buf, &str, &strlen) < 0) - return status; - - status = atoi(str); - - free(str); - return status; -} - #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a63a361f20f4..5201af6305f4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,7 +10,13 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "env.h" -#include "pmu.h" +#include <perf/cpumap.h> + +struct evlist; +union perf_event; +struct perf_header; +struct perf_session; +struct perf_tool; enum { HEADER_RESERVED = 0, /* always cleared */ @@ -91,8 +97,6 @@ struct perf_pipe_file_header { u64 size; }; -struct perf_header; - int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); @@ -124,11 +128,6 @@ struct perf_header_feature_ops { bool synthesize; }; -struct evlist; -struct perf_session; -struct perf_tool; -union perf_event; - extern const char perf_version_string[]; int perf_session__read_header(struct perf_session *session); @@ -196,14 +195,16 @@ int write_padded(struct feat_fd *fd, const void *bf, #define MAX_CACHE_LVL 4 -int is_cpu_online(unsigned int cpu); int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); /* * arch specific callback */ -int get_cpuid(char *buffer, size_t sz); +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu); + +char *get_cpuid_str(struct perf_cpu cpu); + +char *get_cpuid_allow_env_override(struct perf_cpu cpu); -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f387e85a0087..fff134565801 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -218,6 +218,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7d7ae94b4b31..1131056924d9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -87,6 +87,9 @@ enum hist_column { HISTC_TYPE_OFFSET, HISTC_SYMBOL_OFFSET, HISTC_TYPE_CACHELINE, + HISTC_CALLCHAIN_BRANCH_PREDICTED, + HISTC_CALLCHAIN_BRANCH_ABORT, + HISTC_CALLCHAIN_BRANCH_CYCLES, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c new file mode 100644 index 000000000000..4acb9bb19b84 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.c @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "hashmap.h" +#include "hwmon_pmu.h" +#include "pmu.h" +#include <internal/xyarray.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <sys/types.h> +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +/** Strings that correspond to enum hwmon_type. */ +static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = { + NULL, + "cpu", + "curr", + "energy", + "fan", + "humidity", + "in", + "intrusion", + "power", + "pwm", + "temp", +}; +#define LONGEST_HWMON_TYPE_STR "intrusion" + +/** Strings that correspond to enum hwmon_item. */ +static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = { + NULL, + "accuracy", + "alarm", + "auto_channels_temp", + "average", + "average_highest", + "average_interval", + "average_interval_max", + "average_interval_min", + "average_lowest", + "average_max", + "average_min", + "beep", + "cap", + "cap_hyst", + "cap_max", + "cap_min", + "crit", + "crit_hyst", + "div", + "emergency", + "emergency_hist", + "enable", + "fault", + "freq", + "highest", + "input", + "label", + "lcrit", + "lcrit_hyst", + "lowest", + "max", + "max_hyst", + "min", + "min_hyst", + "mod", + "offset", + "pulses", + "rated_max", + "rated_min", + "reset_history", + "target", + "type", + "vid", +}; +#define LONGEST_HWMON_ITEM_STR "average_interval_max" + +static const char *const hwmon_units[HWMON_TYPE_MAX] = { + NULL, + "V", /* cpu */ + "A", /* curr */ + "J", /* energy */ + "rpm", /* fan */ + "%", /* humidity */ + "V", /* in */ + "", /* intrusion */ + "W", /* power */ + "Hz", /* pwm */ + "'C", /* temp */ +}; + +struct hwmon_pmu { + struct perf_pmu pmu; + struct hashmap events; + int hwmon_dir_fd; +}; + +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + +/** + * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. + * + * Hwmon files are of the form <type><number>_<item> and may have a suffix + * _alarm. + */ +struct hwmon_pmu_event_value { + /** @items: which item files are present. */ + DECLARE_BITMAP(items, HWMON_ITEM__MAX); + /** @alarm_items: which item files are present. */ + DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX); + /** @label: contents of <type><number>_label if present. */ + char *label; + /** @name: name computed from label of the form <type>_<label>. */ + char *name; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START && + pmu->type <= PERF_PMU_TYPE_HWMON_END; +} + +bool evsel__is_hwmon(const struct evsel *evsel) +{ + return perf_pmu__is_hwmon(evsel->pmu); +} + +static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key).type_and_num; +} + +static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key1).type_and_num == + ((union hwmon_pmu_event_key)key2).type_and_num; +} + +static int hwmon_strcmp(const void *a, const void *b) +{ + const char *sa = a; + const char * const *sb = b; + + return strcmp(sa, *sb); +} + +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm) +{ + char fn_type[24]; + const char **elem; + const char *fn_item = NULL; + size_t fn_item_len; + + assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type)); + strlcpy(fn_type, filename, sizeof(fn_type)); + for (size_t i = 0; fn_type[i] != '\0'; i++) { + if (fn_type[i] >= '0' && fn_type[i] <= '9') { + fn_type[i] = '\0'; + *number = strtoul(&filename[i], (char **)&fn_item, 10); + if (*fn_item == '_') + fn_item++; + break; + } + if (fn_type[i] == '_') { + fn_type[i] = '\0'; + *number = -1; + fn_item = &filename[i + 1]; + break; + } + } + if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) { + pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename); + return false; + } + elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1, + sizeof(hwmon_type_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n", + fn_type, filename); + return false; + } + + *type = elem - &hwmon_type_strs[0]; + if (!item) + return true; + + *alarm = false; + fn_item_len = strlen(fn_item); + if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) { + assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type)); + strlcpy(fn_type, fn_item, fn_item_len - 5); + fn_item = fn_type; + *alarm = true; + } + elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1, + sizeof(hwmon_item_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n", + fn_item, filename); + return false; + } + *item = elem - &hwmon_item_strs[0]; + return true; +} + +static void fix_name(char *p) +{ + char *s = strchr(p, '\n'); + + if (s) + *s = '\0'; + + while (*p != '\0') { + if (strchr(" :,/\n\t", *p)) + *p = '_'; + else + *p = tolower(*p); + p++; + } +} + +static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) +{ + DIR *dir; + struct dirent *ent; + int dup_fd, err = 0; + struct hashmap_entry *cur, *tmp; + size_t bkt; + + if (pmu->pmu.sysfs_aliases_loaded) + return 0; + + /* + * Use a dup-ed fd as closedir will close it. Use openat so that the + * directory contents are refreshed. + */ + dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY); + + if (dup_fd == -1) + return -ENOMEM; + + dir = fdopendir(dup_fd); + if (!dir) { + close(dup_fd); + return -ENOMEM; + } + + while ((ent = readdir(dir)) != NULL) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hwmon_pmu_event_value *value; + + if (ent->d_type != DT_REG) + continue; + + if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) { + pr_debug3("Not a hwmon file '%s'\n", ent->d_name); + continue; + } + key.num = number; + key.type = type; + if (!hashmap__find(&pmu->events, key.type_and_num, &value)) { + value = zalloc(sizeof(*value)); + if (!value) { + err = -ENOMEM; + goto err_out; + } + err = hashmap__add(&pmu->events, key.type_and_num, value); + if (err) { + free(value); + err = -ENOMEM; + goto err_out; + } + } + __set_bit(item, alarm ? value->alarm_items : value->items); + if (item == HWMON_ITEM_LABEL) { + char buf[128]; + int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY); + ssize_t read_len; + + if (fd < 0) + continue; + + read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) + buf[read_len] = '\0'; + + if (buf[0] == '\0') { + pr_debug("hwmon_pmu: empty label file %s %s\n", + pmu->pmu.name, ent->d_name); + close(fd); + continue; + } + value->label = strdup(buf); + if (!value->label) { + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + continue; + } + snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label); + fix_name(buf); + value->name = strdup(buf); + if (!value->name) + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + } + } + if (hashmap__size(&pmu->events) == 0) + pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name); + + hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (!test_bit(HWMON_ITEM_INPUT, value->items)) { + pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n", + pmu->pmu.name, hwmon_type_strs[key.type], key.num); + hashmap__delete(&pmu->events, key.type_and_num, &key, &value); + zfree(&value->label); + zfree(&value->name); + free(value); + } + } + pmu->pmu.sysfs_aliases_loaded = true; + +err_out: + closedir(dir); + return err; +} + +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name) +{ + char buf[32]; + struct hwmon_pmu *hwm; + + hwm = zalloc(sizeof(*hwm)); + if (!hwm) + return NULL; + + hwm->hwmon_dir_fd = hwmon_dir; + hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + goto err_out; + } + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); + hwm->pmu.name = strdup(buf); + if (!hwm->pmu.name) + goto err_out; + hwm->pmu.alias_name = strdup(sysfs_name); + if (!hwm->pmu.alias_name) + goto err_out; + hwm->pmu.cpus = perf_cpu_map__new("0"); + if (!hwm->pmu.cpus) + goto err_out; + INIT_LIST_HEAD(&hwm->pmu.format); + INIT_LIST_HEAD(&hwm->pmu.aliases); + INIT_LIST_HEAD(&hwm->pmu.caps); + hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, + hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); + + list_add_tail(&hwm->pmu.list, pmus); + return &hwm->pmu; +err_out: + free((char *)hwm->pmu.name); + free(hwm->pmu.alias_name); + free(hwm); + close(hwmon_dir); + return NULL; +} + +void hwmon_pmu__exit(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + zfree(&value->label); + zfree(&value->name); + free(value); + } + hashmap__clear(&hwm->events); + close(hwm->hwmon_dir_fd); +} + +static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, + union hwmon_pmu_event_key key, + const unsigned long *items, bool is_alarm) +{ + size_t bit; + char buf[64]; + size_t len = 0; + + for_each_set_bit(bit, items, HWMON_ITEM__MAX) { + int fd; + + if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT) + continue; + + snprintf(buf, sizeof(buf), "%s%d_%s%s", + hwmon_type_strs[key.type], + key.num, + hwmon_item_strs[bit], + is_alarm ? "_alarm" : ""); + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + if (fd > 0) { + ssize_t read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) { + long long val; + + buf[read_len] = '\0'; + val = strtoll(buf, /*endptr=*/NULL, 10); + len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s", + len == 0 ? " " : ", ", + hwmon_item_strs[bit], + is_alarm ? "_alarm" : "", + (double)val / 1000.0, + hwmon_units[key.type]); + } + close(fd); + } + } + return len; +} + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur; + size_t bkt; + + if (hwmon_pmu__read_events(hwm)) + return false; + + hashmap__for_each_entry((&hwm->events), cur, bkt) { + static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = { + NULL, + "0.001V", /* cpu */ + "0.001A", /* curr */ + "0.001J", /* energy */ + "1rpm", /* fan */ + "0.001%", /* humidity */ + "0.001V", /* in */ + NULL, /* intrusion */ + "0.001W", /* power */ + "1Hz", /* pwm */ + "0.001'C", /* temp */ + }; + static const char *const hwmon_desc[HWMON_TYPE_MAX] = { + NULL, + "CPU core reference voltage", /* cpu */ + "Current", /* curr */ + "Cumulative energy use", /* energy */ + "Fan", /* fan */ + "Humidity", /* humidity */ + "Voltage", /* in */ + "Chassis intrusion detection", /* intrusion */ + "Power use", /* power */ + "Pulse width modulation fan control", /* pwm */ + "Temperature", /* temp */ + }; + char alias_buf[64]; + char desc_buf[256]; + char encoding_buf[128]; + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + struct pmu_event_info info = { + .pmu = pmu, + .name = value->name, + .alias = alias_buf, + .scale_unit = hwmon_scale_units[key.type], + .desc = desc_buf, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "hwmon", + .pmu_name = pmu->name, + .event_type_desc = "Hwmon event", + }; + int ret; + size_t len; + + len = snprintf(alias_buf, sizeof(alias_buf), "%s%d", + hwmon_type_strs[key.type], key.num); + if (!info.name) { + info.name = info.alias; + info.alias = NULL; + } + + len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.", + hwmon_desc[key.type], + pmu->name + 6, + value->label ?: info.name); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->items, /*is_alarm=*/false); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->alarm_items, /*is_alarm=*/true); + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/", + pmu->name, cur->key); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t hwmon_pmu__num_events(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + + hwmon_pmu__read_events(hwm); + return hashmap__size(&hwm->events); +} + +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + enum hwmon_type type; + int number; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hashmap_entry *cur; + size_t bkt; + + if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL)) + return false; + + if (hwmon_pmu__read_events(hwm)) + return false; + + key.type = type; + key.num = number; + if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL)) + return true; + if (key.num != -1) + return false; + /* Item is of form <type>_ which means we should match <type>_<label>. */ + hashmap__for_each_entry((&hwm->events), cur, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + key.type_and_num = cur->key; + if (key.type == type && value->name && !strcasecmp(name, value->name)) + return true; + } + return false; +} + +static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + if (number == -1) { + /* + * Item is of form <type>_ which means we should + * match <type>_<label>. + */ + struct hashmap_entry *cur; + size_t bkt; + + attr->config = 0; + hashmap__for_each_entry((&hwm->events), cur, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (key.type == type && value->name && + !strcasecmp(term->config, value->name)) { + attr->config = key.type_and_num; + break; + } + } + if (attr->config == 0) + return -EINVAL; + } else { + union hwmon_pmu_event_key key = { + .type_and_num = 0, + }; + + key.type = type; + key.num = number; + attr->config = key.type_and_num; + } + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct parse_events_term *term; + int ret; + + ret = hwmon_pmu__read_events(hwm); + if (ret) + return ret; + + list_for_each_entry(term, &terms->terms, list) { + if (hwmon_pmu__config_term(hwm, attr, term, err)) + return -EINVAL; + } + + return 0; + +} + +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err) +{ + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + info->unit = hwmon_units[type]; + if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM || + type == HWMON_TYPE_INTRUSION) + info->scale = 1; + else + info->scale = 0.001; + } + return 0; + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus) +{ + char *line = NULL; + DIR *class_hwmon_dir; + struct dirent *class_hwmon_ent; + char buf[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); + class_hwmon_dir = opendir(buf); + if (!class_hwmon_dir) + return 0; + + while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) { + size_t line_len; + int hwmon_dir, name_fd; + struct io io; + + if (class_hwmon_ent->d_type != DT_LNK) + continue; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name); + hwmon_dir = open(buf, O_DIRECTORY); + if (hwmon_dir == -1) { + pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n", + sysfs, class_hwmon_ent->d_name); + continue; + } + name_fd = openat(hwmon_dir, "name", O_RDONLY); + if (name_fd == -1) { + pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n", + sysfs, class_hwmon_ent->d_name); + close(hwmon_dir); + continue; + } + io__init(&io, name_fd, buf, sizeof(buf)); + io__getline(&io, &line, &line_len); + if (line_len > 0 && line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line); + close(name_fd); + } + free(line); + closedir(class_hwmon_dir); + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu); + union hwmon_pmu_event_key key = { + .type_and_num = evsel->core.attr.config, + }; + int idx = 0, thread = 0, nthreads, err = 0; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + char buf[64]; + int fd; + + snprintf(buf, sizeof(buf), "%s%d_input", + hwmon_type_strs[key.type], key.num); + + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + return err; +} + +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + char buf[32]; + int fd; + ssize_t len; + struct perf_counts_values *count, *old_count = NULL; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + len = pread(fd, buf, sizeof(buf), 0); + if (len <= 0) { + count->lost++; + return -EINVAL; + } + buf[len] = '\0'; + if (old_count) { + count->val = old_count->val + strtoll(buf, NULL, 10); + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = strtoll(buf, NULL, 10); + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h new file mode 100644 index 000000000000..882566846df4 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HWMON_PMU_H +#define __HWMON_PMU_H + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +/** + * enum hwmon_type: + * + * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are + * defined over multiple files of the form <type><num>_<item>. This enum + * captures potential <type> values. + * + * This enum is exposed for testing. + */ +enum hwmon_type { + HWMON_TYPE_NONE, + + HWMON_TYPE_CPU, + HWMON_TYPE_CURR, + HWMON_TYPE_ENERGY, + HWMON_TYPE_FAN, + HWMON_TYPE_HUMIDITY, + HWMON_TYPE_IN, + HWMON_TYPE_INTRUSION, + HWMON_TYPE_POWER, + HWMON_TYPE_PWM, + HWMON_TYPE_TEMP, + + HWMON_TYPE_MAX +}; + +/** + * enum hwmon_item: + * + * Similar to enum hwmon_type but describes the item part of a a sysfs filename. + * + * This enum is exposed for testing. + */ +enum hwmon_item { + HWMON_ITEM_NONE, + + HWMON_ITEM_ACCURACY, + HWMON_ITEM_ALARM, + HWMON_ITEM_AUTO_CHANNELS_TEMP, + HWMON_ITEM_AVERAGE, + HWMON_ITEM_AVERAGE_HIGHEST, + HWMON_ITEM_AVERAGE_INTERVAL, + HWMON_ITEM_AVERAGE_INTERVAL_MAX, + HWMON_ITEM_AVERAGE_INTERVAL_MIN, + HWMON_ITEM_AVERAGE_LOWEST, + HWMON_ITEM_AVERAGE_MAX, + HWMON_ITEM_AVERAGE_MIN, + HWMON_ITEM_BEEP, + HWMON_ITEM_CAP, + HWMON_ITEM_CAP_HYST, + HWMON_ITEM_CAP_MAX, + HWMON_ITEM_CAP_MIN, + HWMON_ITEM_CRIT, + HWMON_ITEM_CRIT_HYST, + HWMON_ITEM_DIV, + HWMON_ITEM_EMERGENCY, + HWMON_ITEM_EMERGENCY_HIST, + HWMON_ITEM_ENABLE, + HWMON_ITEM_FAULT, + HWMON_ITEM_FREQ, + HWMON_ITEM_HIGHEST, + HWMON_ITEM_INPUT, + HWMON_ITEM_LABEL, + HWMON_ITEM_LCRIT, + HWMON_ITEM_LCRIT_HYST, + HWMON_ITEM_LOWEST, + HWMON_ITEM_MAX, + HWMON_ITEM_MAX_HYST, + HWMON_ITEM_MIN, + HWMON_ITEM_MIN_HYST, + HWMON_ITEM_MOD, + HWMON_ITEM_OFFSET, + HWMON_ITEM_PULSES, + HWMON_ITEM_RATED_MAX, + HWMON_ITEM_RATED_MIN, + HWMON_ITEM_RESET_HISTORY, + HWMON_ITEM_TARGET, + HWMON_ITEM_TYPE, + HWMON_ITEM_VID, + + HWMON_ITEM__MAX, +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); +bool evsel__is_hwmon(const struct evsel *evsel); + +/** + * parse_hwmon_filename() - Parse filename into constituent parts. + * + * @filename: To be parsed, of the form <type><number>_<item>. + * @type: The type defined from the parsed file name. + * @number: The number of the type, for example there may be more than 1 fan. + * @item: A hwmon <type><number> may have multiple associated items. + * @alarm: Is the filename for an alarm value? + * + * An example of a hwmon filename is "temp1_input". The type is temp for a + * temperature value. The number is 1. The item within the file is an input + * value - the temperature itself. This file doesn't contain an alarm value. + * + * Exposed for testing. + */ +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm); + +/** + * hwmon_pmu__new() - Allocate and construct a hwmon PMU. + * + * @pmus: The list of PMUs to be added to. + * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory. + * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. + * @name: The contents of the "name" file in the hwmon directory. + * + * Exposed for testing. Regular construction should happen via + * perf_pmus__read_hwmon_pmus. + */ +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, + const char *sysfs_name, const char *name); +void hwmon_pmu__exit(struct perf_pmu *pmu); + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t hwmon_pmu__num_events(struct perf_pmu *pmu); +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name); +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err); + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus); + + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __HWMON_PMU_H */ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 75b28dcc8317..6f1b9f6b2466 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,52 +2,132 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ #include "annotate.h" +#include <elf.h> + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + +#ifndef EM_CSKY +#define EM_CSKY 252 /* C-SKY */ +#endif +#ifndef EF_CSKY_ABIV1 +#define EF_CSKY_ABIV1 0X10000000 +#endif +#ifndef EF_CSKY_ABIV2 +#define EF_CSKY_ABIV2 0X20000000 +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + +/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */ +#if defined(__x86_64__) + #define EM_HOST EM_X86_64 +#elif defined(__i386__) + #define EM_HOST EM_386 +#elif defined(__aarch64__) + #define EM_HOST EM_AARCH64 +#elif defined(__arm__) + #define EM_HOST EM_ARM +#elif defined(__alpha__) + #define EM_HOST EM_ALPHA +#elif defined(__arc__) + #define EM_HOST EM_ARC +#elif defined(__AVR__) + #define EM_HOST EM_AVR +#elif defined(__AVR32__) + #define EM_HOST EM_AVR32 +#elif defined(__bfin__) + #define EM_HOST EM_BLACKFIN +#elif defined(__csky__) + #define EM_HOST EM_CSKY + #if defined(__CSKYABIV2__) + #define EF_HOST EF_CSKY_ABIV2 + #else + #define EF_HOST EF_CSKY_ABIV1 + #endif +#elif defined(__cris__) + #define EM_HOST EM_CRIS +#elif defined(__hppa__) // HP PA-RISC + #define EM_HOST EM_PARISC +#elif defined(__loongarch__) + #define EM_HOST EM_LOONGARCH +#elif defined(__mips__) + #define EM_HOST EM_MIPS +#elif defined(__m32r__) + #define EM_HOST EM_M32R +#elif defined(__microblaze__) + #define EM_HOST EM_MICROBLAZE +#elif defined(__MSP430__) + #define EM_HOST EM_MSP430 +#elif defined(__powerpc64__) + #define EM_HOST EM_PPC64 +#elif defined(__powerpc__) + #define EM_HOST EM_PPC +#elif defined(__riscv) + #define EM_HOST EM_RISCV +#elif defined(__s390x__) + #define EM_HOST EM_S390 +#elif defined(__sh__) + #define EM_HOST EM_SH +#elif defined(__sparc64__) || defined(__sparc__) + #define EM_HOST EM_SPARC +#elif defined(__xtensa__) + #define EM_HOST EM_XTENSA +#else + /* Unknown host ELF machine type. */ + #define EM_HOST EM_NONE +#endif + +#if !defined(EF_HOST) + #define EF_HOST 0 +#endif #define DWARF_REG_PC 0xd3af9c /* random number */ #define DWARF_REG_FB 0xd3affb /* random number */ -#ifdef HAVE_DWARF_SUPPORT -const char *get_arch_regstr(unsigned int n); -/* - * get_dwarf_regstr - Returns ftrace register string from DWARF regnum - * n: DWARF register number - * machine: ELF machine signature (EM_*) +#ifdef HAVE_LIBDW_SUPPORT +const char *get_csky_regstr(unsigned int n, unsigned int flags); + +/** + * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum. + * @n: DWARF register number. + * @machine: ELF machine signature (EM_*). + * @flags: ELF flags for things like ABI differences. */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine); +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags); +int get_x86_regnum(const char *name); + +#if !defined(__x86_64__) && !defined(__i386__) int get_arch_regnum(const char *name); +#endif + /* * get_dwarf_regnum - Returns DWARF regnum from register name * name: architecture register name * machine: ELF machine signature (EM_*) */ -int get_dwarf_regnum(const char *name, unsigned int machine); +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags); + +void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline int get_dwarf_regnum(const char *name __maybe_unused, - unsigned int machine __maybe_unused) + unsigned int machine __maybe_unused, + unsigned int flags __maybe_unused) { return -1; } -#endif -#if !defined(__powerpc__) || !defined(HAVE_DWARF_SUPPORT) static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused, struct annotated_op_loc *op_loc __maybe_unused) { return; } -#else -void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); #endif -#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -/* - * Arch should support fetching the offset of a register in pt_regs - * by its name. See kernel's regs_query_register_offset in - * arch/xxx/kernel/ptrace.c. - */ -int regs_query_register_offset(const char *name); -#endif #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 27d9b5c9fec8..a7c589fecb98 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused, else sz = len; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < sz; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < br_sz; i++) @@ -808,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, static const char * const intel_bts_info_fmts[] = { [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index fd2597613f3d..30be6dfe09eb 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -249,7 +249,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -4110,7 +4110,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fad227b625d1..27d5345d2b30 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -134,6 +134,8 @@ struct machine *machine__new_host(void) if (machine__create_kernel_maps(machine) < 0) goto out_delete; + + machine->env = &perf_env; } return machine; @@ -1343,7 +1345,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo * we need to update the symtab_type if needed. */ if (m->comp && is_kmod_dso(dso)) { - dso__set_symtab_type(dso, dso__symtab_type(dso)); + dso__set_symtab_type(dso, dso__symtab_type(dso)+1); dso__set_comp(dso, m->comp); } map__put(map); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 051feb93ed8d..bf5090f5220b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -366,6 +366,12 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_L1] = "L1", + [PERF_MEM_LVLNUM_L2] = "L2", + [PERF_MEM_LVLNUM_L3] = "L3", + [PERF_MEM_LVLNUM_L4] = "L4", + [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB", + [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache", [PERF_MEM_LVLNUM_UNC] = "Uncached", [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", @@ -448,7 +454,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf if (mem_lvlnum[lvl]) l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); else - l += scnprintf(out + l, sz - l, "L%d", lvl); + l += scnprintf(out + l, sz - l, "Unknown level %d", lvl); l += scnprintf(out + l, sz - l, " %s", hit_miss); return l; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4dff3e925a47..46920ebadfd1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -14,6 +14,7 @@ #include "pmus.h" #include "print-events.h" #include "smt.h" +#include "tool_pmu.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -297,8 +298,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, struct expr_id_data *val_ptr; /* Don't match events for the wrong hybrid PMU. */ - if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && - strcmp(ev->pmu_name, pmu)) + if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) && + strcmp(ev->pmu->name, pmu)) continue; /* * Check for duplicate events with the same name. For @@ -673,20 +674,20 @@ static int metricgroup__build_event_string(struct strbuf *events, struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_tool_events = false; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->pkey; - enum perf_tool_event ev; + enum tool_pmu_event ev; pr_debug("found event %s\n", id); /* Always move tool events outside of the group. */ - ev = perf_tool_event__from_str(id); - if (ev != PERF_TOOL_NONE) { + ev = tool_pmu__str_to_event(id); + if (ev != TOOL_PMU__EVENT_NONE) { has_tool_events = true; tool_events[ev] = true; continue; @@ -754,14 +755,14 @@ static int metricgroup__build_event_string(struct strbuf *events, if (has_tool_events) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { if (!no_group) { ret = strbuf_addch(events, ','); RETURN_IF_NON_ZERO(ret); } no_group = false; - ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + ret = strbuf_addstr(events, tool_pmu__event_to_str(i)); RETURN_IF_NON_ZERO(ret); } } @@ -1147,14 +1148,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data)) left_count--; } right_count = hashmap__size(right->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data)) right_count--; } @@ -1374,18 +1375,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list) * to true if tool event is found. */ static void find_tool_events(const struct list_head *metric_list, - bool tool_events[PERF_TOOL_MAX]) + bool tool_events[TOOL_PMU__EVENT_MAX]) { struct metric *m; list_for_each_entry(m, metric_list, nd) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { struct expr_id_data *data; if (!tool_events[i] && - !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data)) tool_events[i] = true; } } @@ -1446,7 +1447,7 @@ err_out: */ static int parse_ids(bool metric_no_merge, bool fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool group_events, const bool tool_events[PERF_TOOL_MAX], + bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX], struct evlist **out_evlist) { struct parse_events_error parse_error; @@ -1471,9 +1472,9 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu, * event1 if #smt_on else 0 * Add a tool event to avoid a parse error on an empty string. */ - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { - char *tmp = strdup(perf_tool_event__to_str(i)); + char *tmp = strdup(tool_pmu__event_to_str(i)); if (!tmp) return -ENOMEM; @@ -1535,7 +1536,7 @@ static int parse_groups(struct evlist *perf_evlist, struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; bool is_default = !strcmp(str, "Default"); int ret; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9a8be1e46d67..afeb8d815bbf 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -228,7 +228,7 @@ __add_event(struct list_head *list, int *idx, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, - struct perf_cpu_map *cpu_list) + struct perf_cpu_map *cpu_list, u64 alternate_hw_config) { struct evsel *evsel; struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list; @@ -263,7 +263,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.is_pmu_core = pmu ? pmu->is_core : false; evsel->auto_merge_stats = auto_merge_stats; evsel->pmu = pmu; - evsel->pmu_name = pmu ? strdup(pmu->name) : NULL; + evsel->alternate_hw_config = alternate_hw_config; if (name) evsel->name = strdup(name); @@ -286,47 +286,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms, + u64 alternate_hw_config) { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; -} - -static int add_event_tool(struct list_head *list, int *idx, - enum perf_tool_event tool_event) -{ - struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_DUMMY, - }; - struct perf_cpu_map *cpu_list = NULL; - - if (tool_event == PERF_TOOL_DURATION_TIME) { - /* Duration time is gathered globally, pretend it is only on CPU0. */ - cpu_list = perf_cpu_map__new("0"); - } - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - cpu_list); - perf_cpu_map__put(cpu_list); - if (!evsel) - return -ENOMEM; - evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME - || tool_event == PERF_TOOL_USER_TIME - || tool_event == PERF_TOOL_SYSTEM_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } - return 0; + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + alternate_hw_config) ? 0 : -ENOMEM; } /** @@ -450,7 +422,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats); + bool auto_merge_stats, u64 alternate_hw_config); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, @@ -476,7 +448,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, */ ret = parse_events_add_pmu(parse_state, list, pmu, parsed_terms, - perf_pmu__auto_merge_stats(pmu)); + perf_pmu__auto_merge_stats(pmu), + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) return ret; continue; @@ -507,7 +480,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) == NULL) + /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) return -ENOMEM; free_config_terms(&config_terms); @@ -772,7 +746,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, name = get_config_name(head_config); return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, - &config_terms); + &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int check_type_val(struct parse_events_term *term, @@ -794,7 +768,7 @@ static int check_type_val(struct parse_events_term *term, static bool config_term_shrinked; -static const char *config_term_name(enum parse_events__term_type term_type) +const char *parse_events__term_type_str(enum parse_events__term_type term_type) { /* * Update according to parse-events.l @@ -880,7 +854,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", - config_term_name(term_type)) >= 0) + parse_events__term_type_str(term_type)) >= 0) parse_events_error__handle(err, -1, err_str, NULL); return false; } @@ -1004,7 +978,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_HARDWARE: default: parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), + strdup(parse_events__term_type_str(term->type_term)), parse_events_formats_error_string(NULL)); return -EINVAL; } @@ -1072,6 +1046,7 @@ static int config_term_pmu(struct perf_event_attr *attr, if (perf_pmu__have_event(pmu, term->config)) { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; term->no_value = true; + term->alternate_hw_config = true; } else { attr->type = PERF_TYPE_HARDWARE; attr->config = term->val.num; @@ -1127,8 +1102,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr, default: if (err) { parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), - strdup("valid terms: call-graph,stack-size\n")); + strdup(parse_events__term_type_str(term->type_term)), + strdup("valid terms: call-graph,stack-size\n") + ); } return -EINVAL; } @@ -1384,8 +1360,9 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) ? 0 : -ENOMEM; + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX + ) == NULL ? -ENOMEM : 0; free_config_terms(&config_terms); return ret; } @@ -1421,13 +1398,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, type, /*extended_type=*/0, config, head_config); } -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event) -{ - return add_event_tool(list, &parse_state->idx, tool_event); -} - static bool config_term_percore(struct list_head *config_terms) { struct evsel_config_term *term; @@ -1443,7 +1413,7 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats) + bool auto_merge_stats, u64 alternate_hw_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1480,7 +1450,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1501,7 +1471,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /* Look for event names in the terms and rewrite into format based terms. */ if (perf_pmu__check_alias(pmu, &parsed_terms, - &info, &alias_rewrote_terms, err)) { + &info, &alias_rewrote_terms, + &alternate_hw_config, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1537,7 +1508,9 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; } - if (perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) { + /* Skip configuring hard coded terms that were applied by config_attr. */ + if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false, + parse_state->error)) { free_config_terms(&config_terms); parse_events_terms__exit(&parsed_terms); return -EINVAL; @@ -1546,7 +1519,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); + &config_terms, auto_merge_stats, /*cpu_list=*/NULL, + alternate_hw_config); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1567,7 +1541,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc_) { @@ -1620,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats)) { + &parsed_terms, auto_merge_stats, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1633,7 +1607,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - /*auto_merge_stats=*/true)) { + /*auto_merge_stats=*/true, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1674,13 +1648,15 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; } @@ -1693,7 +1669,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - auto_merge_stats)) { + auto_merge_stats, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { ok++; parse_state->wild_card_pmus = true; } @@ -1704,7 +1681,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failure to add, assume event_or_pmu is an event name. */ zfree(listp); - if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc)) + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX, + const_parsed_terms, listp, loc)) return 0; if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0) @@ -1755,14 +1733,10 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, int exclude = eu | ek | eh; int exclude_GH = group ? evsel->exclude_GH : 0; - if (mod.precise) { - /* use of precise requires exclude_guest */ - eG = 1; - } if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) + if (!exclude_GH && !perf_guest && exclude_GH_default) eG = 1; eu = 0; } @@ -2566,7 +2540,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_name(type_term)), + .config = config ? : strdup(parse_events__term_type_str(type_term)), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -2600,7 +2574,7 @@ int parse_events_term__term(struct parse_events_term **term, void *loc_term, void *loc_val) { return parse_events_term__str(term, term_lhs, NULL, - strdup(config_term_name(term_rhs)), + strdup(parse_events__term_type_str(term_rhs)), loc_term, loc_val); } @@ -2707,7 +2681,8 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct if (ret < 0) return ret; } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) { - ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term)); + ret = strbuf_addf(sb, "%s=", + parse_events__term_type_str(term->type_term)); if (ret < 0) return ret; } @@ -2727,7 +2702,7 @@ static void config_terms_list(char *buf, size_t buf_sz) buf[0] = '\0'; for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { - const char *name = config_term_name(i); + const char *name = parse_events__term_type_str(i); if (!config_term_avail(i, NULL)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 10cc9c433116..3f4334ec6231 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -127,6 +127,12 @@ struct parse_events_term { * value is assumed to be 1. An event name also has no value. */ bool no_value; + /** + * @alternate_hw_config: config is the event name but num is an + * alternate PERF_TYPE_HARDWARE config value which is often nice for the + * sake of quick matching. + */ + bool alternate_hw_config; }; struct parse_events_error { @@ -162,6 +168,8 @@ struct parse_events_state { bool wild_card_pmus; }; +const char *parse_events__term_type_str(enum parse_events__term_type term_type); + bool parse_events__filter_pmu(const struct parse_events_state *parse_state, const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); @@ -221,9 +229,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, u32 type, u64 config, const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, struct parse_events_terms *parsed_terms); @@ -238,7 +243,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5a0bcd7f166a..14e5bd856a18 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -121,14 +121,6 @@ static int sym(yyscan_t scanner, int type, int config) return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; } -static int tool(yyscan_t scanner, enum perf_tool_event event) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = event; - return PE_VALUE_SYM_TOOL; -} - static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -404,9 +396,6 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } -user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } -system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index b3c51f06cbdc..f888cbb076d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -56,7 +56,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM -%token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH @@ -68,7 +67,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW -%type <num> PE_VALUE_SYM_TOOL %type <mod> PE_MODIFIER_EVENT %type <term_type> PE_TERM %type <num> value_sym @@ -292,7 +290,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1); + err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -350,20 +348,6 @@ value_sym sep_slash_slash_dc PE_ABORT(err); $$ = list; } -| -PE_VALUE_SYM_TOOL sep_slash_slash_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_tool(_parse_state, list, $1); - if (err) - YYNOMEM; - $$ = list; -} event_legacy_cache: PE_LEGACY_CACHE opt_event_config diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 5ccfe4b64cdf..0dacc133ed39 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -233,7 +233,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, } if (is_libpfm_event_supported(name, cpus, threads)) { - print_cb->print_event(print_state, pinfo->name, topic, + print_cb->print_event(print_state, topic, pinfo->name, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -267,8 +267,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, continue; print_cb->print_event(print_state, - pinfo->name, topic, + pinfo->name, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61bdda01a05a..08a9d0bd9301 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,7 +18,9 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "hwmon_pmu.h" #include "pmus.h" +#include "tool_pmu.h" #include <util/pmu-bison.h> #include <util/pmu-flex.h> #include "parse-events.h" @@ -817,31 +819,6 @@ static int is_sysfs_pmu_core(const char *name) return file_available(path); } -char *perf_pmu__getcpuid(struct perf_pmu *pmu) -{ - char *cpuid; - static bool printed; - - cpuid = getenv("PERF_CPUID"); - if (cpuid) - cpuid = strdup(cpuid); - if (!cpuid) - cpuid = get_cpuid_str(pmu); - if (!cpuid) - return NULL; - - if (!printed) { - pr_debug("Using CPUID %s\n", cpuid); - printed = true; - } - return cpuid; -} - -__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - return perf_pmu__find_metrics_table(NULL); -} - /** * Return the length of the PMU name not including the suffix for uncore PMUs. * @@ -1168,7 +1145,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm return pmu; } -static bool perf_pmu__is_fake(const struct perf_pmu *pmu) +bool perf_pmu__is_fake(const struct perf_pmu *pmu) { return pmu->type == PERF_PMU_TYPE_FAKE; } @@ -1366,7 +1343,8 @@ static int pmu_config_term(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_terms *head_terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -1380,11 +1358,46 @@ static int pmu_config_term(const struct perf_pmu *pmu, return 0; /* - * Hardcoded terms should be already in, so nothing - * to be done for them. + * Hardcoded terms are generally handled in event parsing, which + * traditionally have had to handle not having a PMU. An alias may + * have hard coded config values, optionally apply them below. */ - if (parse_events__is_hardcoded_term(term)) + if (parse_events__is_hardcoded_term(term)) { + /* Config terms set all bits in the config. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + + if (!apply_hardcoded) + return 0; + + bitmap_fill(bits, PERF_PMU_FORMAT_BITS); + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config1, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config2, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config3, zero); + break; + case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ + return -EINVAL; + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + /* Skip non-config terms. */ + break; + default: + break; + } return 0; + } format = pmu_find_format(&pmu->format, term->config); if (!format) { @@ -1466,13 +1479,12 @@ static int pmu_config_term(const struct perf_pmu *pmu, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, - asprintf(&err_str, - "value too big for format (%s), maximum is %llu", - format->name, (unsigned long long)max_val) < 0 - ? strdup("value too big for format") - : err_str, - NULL); + if (asprintf(&err_str, + "value too big for format (%s), maximum is %llu", + format->name, (unsigned long long)max_val) < 0) { + err_str = strdup("value too big for format"); + } + parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL); return -EINVAL; } /* @@ -1488,12 +1500,16 @@ static int pmu_config_term(const struct perf_pmu *pmu, int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct parse_events_term *term; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__config_terms(pmu, attr, terms, err); + list_for_each_entry(term, &terms->terms, list) { - if (pmu_config_term(pmu, attr, term, terms, zero, err)) + if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) return -EINVAL; } @@ -1507,6 +1523,7 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *err) { bool zero = !!pmu->perf_event_attr_init_default; @@ -1515,7 +1532,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, if (perf_pmu__is_fake(pmu)) return 0; - return perf_pmu__config_terms(pmu, attr, head_terms, zero, err); + return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, @@ -1606,7 +1623,7 @@ static int check_info_data(struct perf_pmu *pmu, */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err) + u64 *alternate_hw_config, struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; @@ -1623,6 +1640,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->scale = 0.0; info->snapshot = false; + if (perf_pmu__is_hwmon(pmu)) { + ret = hwmon_pmu__check_alias(head_terms, info, err); + goto out; + } + /* Fake PMU doesn't rewrite terms. */ if (perf_pmu__is_fake(pmu)) goto out; @@ -1638,6 +1660,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) @@ -1646,6 +1669,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (alias->per_pkg) info->per_pkg = true; + if (term->alternate_hw_config) + *alternate_hw_config = term->val.num; + list_del_init(&term->list); parse_events_term__delete(term); } @@ -1790,6 +1816,10 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { if (!name) return false; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) + return false; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) @@ -1801,6 +1831,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__num_events(pmu); + pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases + pmu->sys_json_aliases; @@ -1811,6 +1844,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) else assert(pmu->cpu_json_aliases == 0); + if (perf_pmu__is_tool(pmu)) + nr -= tool_pmu__num_skip_events(); + return pmu->selectable ? nr + 1 : nr; } @@ -1861,12 +1897,18 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, int ret = 0; struct strbuf sb; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__for_each_event(pmu, state, cb); + strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { size_t buf_used, pmu_name_len; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) + continue; + info.pmu_name = event->pmu_name ?: pmu->name; pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name, skip_duplicate_pmus); @@ -1949,6 +1991,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu) case PERF_TYPE_HW_CACHE: return false; case PERF_TYPE_RAW: return false; case PERF_TYPE_BREAKPOINT: return true; + case PERF_PMU_TYPE_TOOL: return true; default: break; } for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) { @@ -2200,11 +2243,6 @@ bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok) (need_fnmatch && !fnmatch(tok, name, 0)); } -double __weak perf_pmu__cpu_slots_per_cycle(void) -{ - return NAN; -} - int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) { const char *sysfs = sysfs__mountpoint(); @@ -2257,6 +2295,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (perf_pmu__is_hwmon(pmu)) + hwmon_pmu__exit(pmu); + perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); perf_pmu__del_caps(pmu); @@ -2280,7 +2321,9 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { struct perf_event_attr attr = {.config = 0,}; - int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + + int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, + /*err=*/NULL); if (ret == 0 && config == attr.config) return event->name; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4397c48ad569..dbed6c243a5e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -37,6 +37,9 @@ struct perf_pmu_caps { }; enum { + PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, + PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, + PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, PERF_PMU_TYPE_FAKE = 0xFFFFFFFF, }; @@ -169,6 +172,10 @@ struct perf_pmu { * exclude_host. */ bool exclude_guest; + /** + * @checked: Are the missing features checked? + */ + bool checked; } missing_features; /** @@ -206,16 +213,18 @@ typedef int (*pmu_format_callback)(void *state, const char *name, int config, void pmu_add_sys_aliases(struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *error); int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *error); + bool zero, bool apply_hardcoded, + struct parse_events_error *error); __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err); + u64 *alternate_hw_config, struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); void perf_pmu_format__set_value(void *format, int config, unsigned long *bits); @@ -253,8 +262,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu); void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table); -char *perf_pmu__getcpuid(struct perf_pmu *pmu); -const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); @@ -268,7 +275,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok); -double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); @@ -280,6 +286,8 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__find_core_pmu(void); + const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); +bool perf_pmu__is_fake(const struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 52109af5f2f1..b493da0d22ef 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -15,6 +15,8 @@ #include "evsel.h" #include "pmus.h" #include "pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "print-events.h" #include "strbuf.h" @@ -200,6 +202,7 @@ static void pmu_read_sysfs(bool core_only) int fd; DIR *dir; struct dirent *dent; + struct perf_pmu *tool_pmu; if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) return; @@ -229,6 +232,11 @@ static void pmu_read_sysfs(bool core_only) pr_err("Failure to set up any core PMUs\n"); } list_sort(NULL, &core_pmus, pmus_cmp); + if (!core_only) { + tool_pmu = perf_pmus__tool_pmu(); + list_add_tail(&tool_pmu->list, &other_pmus); + perf_pmus__read_hwmon_pmus(&other_pmus); + } list_sort(NULL, &other_pmus, pmus_cmp); if (!list_empty(&core_pmus)) { read_sysfs_core_pmus = true; @@ -434,6 +442,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate, pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name); return 1; } + assert(info->pmu != NULL || info->name != NULL); s = &state->aliases[state->index]; s->pmu = info->pmu; #define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL @@ -494,8 +503,8 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p goto free; print_cb->print_event(print_state, - aliases[j].pmu_name, aliases[j].topic, + aliases[j].pmu_name, aliases[j].name, aliases[j].alias, aliases[j].scale_unit, @@ -724,6 +733,13 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); } +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name) +{ + return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name); +} + struct perf_pmu *perf_pmus__fake_pmu(void) { static struct perf_pmu fake = { diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index e1742b56eec7..a0cb0eb2ff97 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -30,6 +30,9 @@ bool perf_pmus__supports_extended_type(void); char *perf_pmus__default_pmu_name(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name); struct perf_pmu *perf_pmus__fake_pmu(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 81e0135cddf0..a786cbfb0ff5 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -29,6 +29,7 @@ #include "tracepoint.h" #include "pfm.h" #include "thread_map.h" +#include "tool_pmu.h" #include "util.h" #define MAX_NAME_LEN 100 @@ -43,21 +44,6 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; -static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { - [PERF_TOOL_DURATION_TIME] = { - .symbol = "duration_time", - .alias = "", - }, - [PERF_TOOL_USER_TIME] = { - .symbol = "user_time", - .alias = "", - }, - [PERF_TOOL_SYSTEM_TIME] = { - .symbol = "system_time", - .alias = "", - }, -}; - /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -341,24 +327,6 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta return 0; } -void print_tool_events(const struct print_callbacks *print_cb, void *print_state) -{ - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) { - print_cb->print_event(print_state, - "tool", - /*pmu_name=*/NULL, - event_symbols_tool[i].symbol, - event_symbols_tool[i].alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tool event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } -} - void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max) @@ -422,8 +390,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(print_cb, print_state); - print_hwcache_events(print_cb, print_state); perf_pmus__print_pmu_events(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index bf4290bef0cd..445efa1636c1 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -36,7 +36,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max); -void print_tool_events(const struct print_callbacks *print_cb, void *print_state); void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a17c9b8a7a79..eaa0318e9b87 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,6 +40,7 @@ #include "session.h" #include "string2.h" #include "strbuf.h" +#include "parse-events.h" #include <subcmd/pager.h> #include <linux/ctype.h> @@ -51,6 +52,9 @@ #define PERFPROBE_GROUP "probe" +/* Defined in kernel/trace/trace.h */ +#define MAX_EVENT_NAME_LEN 64 + bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; @@ -342,7 +346,7 @@ elf_err: return mod_name; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) { @@ -1036,6 +1040,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) return rv; } +static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr) +{ + if (!lr->function) + return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start); + + if (lr->file) + return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start); + + return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start); +} + #define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) #define show_one_line(f,l) _show_one_line(f,l,false,false) #define skip_one_line(f,l) _show_one_line(f,l,true,false) @@ -1065,9 +1080,12 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = debuginfo__find_line_range(dinfo, lr); if (!ret) { /* Not found, retry with an alternative */ + pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n"); ret = get_alternative_line_range(dinfo, lr, module, user); if (!ret) ret = debuginfo__find_line_range(dinfo, lr); + else /* Ignore error, we just failed to find it. */ + ret = -ENOENT; } if (dinfo->build_id) { build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); @@ -1075,7 +1093,8 @@ static int __show_line_range(struct line_range *lr, const char *module, } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { - pr_warning("Specified source line is not found.\n"); + sprint_line_description(sbuf, sizeof(sbuf), lr); + pr_warning("Specified source line(%s) is not found.\n", sbuf); return -ENOENT; } else if (ret < 0) { pr_warning("Debuginfo analysis failed.\n"); @@ -1250,7 +1269,7 @@ out: return ret; } -#else /* !HAVE_DWARF_SUPPORT */ +#else /* !HAVE_LIBDW_SUPPORT */ static void debuginfo_cache__exit(void) { @@ -1343,30 +1362,39 @@ static bool is_c_func_name(const char *name) * * SRC[:SLN[+NUM|-ELN]] * FNC[@SRC][:SLN[+NUM|-ELN]] + * + * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name. + * SRC and FUNC can be quoted by double/single quotes. */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *file, *name = strdup(arg); - int err; + char *buf = strdup(arg); + char *p; + int err = 0; - if (!name) + if (!buf) return -ENOMEM; lr->start = 0; lr->end = INT_MAX; - range = strchr(name, ':'); - if (range) { - *range++ = '\0'; + p = strpbrk_esq(buf, ":"); + if (p) { + if (p == buf) { + semantic_error("No file/function name in '%s'.\n", p); + err = -EINVAL; + goto err; + } + *(p++) = '\0'; - err = parse_line_num(&range, &lr->start, "start line"); + err = parse_line_num(&p, &lr->start, "start line"); if (err) goto err; - if (*range == '+' || *range == '-') { - const char c = *range++; + if (*p == '+' || *p == '-') { + const char c = *(p++); - err = parse_line_num(&range, &lr->end, "end line"); + err = parse_line_num(&p, &lr->end, "end line"); if (err) goto err; @@ -1390,34 +1418,41 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) " than end line.\n"); goto err; } - if (*range != '\0') { - semantic_error("Tailing with invalid str '%s'.\n", range); + if (*p != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", p); goto err; } } - file = strchr(name, '@'); - if (file) { - *file = '\0'; - lr->file = strdup(++file); - if (lr->file == NULL) { - err = -ENOMEM; + p = strpbrk_esq(buf, "@"); + if (p) { + *p++ = '\0'; + if (strcmp(p, "*")) { + lr->file = strdup_esq(p); + if (lr->file == NULL) { + err = -ENOMEM; + goto err; + } + } + if (*buf != '\0') + lr->function = strdup_esq(buf); + if (!lr->function && !lr->file) { + semantic_error("Only '@*' is not allowed.\n"); + err = -EINVAL; goto err; } - lr->function = name; - } else if (strchr(name, '/') || strchr(name, '.')) - lr->file = name; - else if (is_c_func_name(name))/* We reuse it for checking funcname */ - lr->function = name; + } else if (strpbrk_esq(buf, "/.")) + lr->file = strdup_esq(buf); + else if (is_c_func_name(buf))/* We reuse it for checking funcname */ + lr->function = strdup_esq(buf); else { /* Invalid name */ - semantic_error("'%s' is not a valid function name.\n", name); + semantic_error("'%s' is not a valid function name.\n", buf); err = -EINVAL; goto err; } - return 0; err: - free(name); + free(buf); return err; } @@ -1425,19 +1460,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strpbrk_esc(*arg, ":"); + ptr = strpbrk_esq(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup_esc(*arg); + pev->group = strdup_esq(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - pev->event = strdup_esc(*arg); + pev->event = strdup_esq(*arg); if (pev->event == NULL) return -ENOMEM; @@ -1476,7 +1511,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk_esc(arg, ";=@+%"); + ptr = strpbrk_esq(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1490,7 +1525,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup_esc(ptr + 1); + pev->target = strdup_esq(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1531,7 +1566,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) file_spec = true; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1540,7 +1575,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup_esc(arg); + tmp = strdup_esq(arg); if (tmp == NULL) return -ENOMEM; } @@ -1578,7 +1613,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -ENOMEM; break; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1605,7 +1640,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup_esc(arg); + if (!strcmp(arg, "*")) + break; + pp->file = strdup_esq(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2757,7 +2794,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, /* Try no suffix number */ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { - pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase); + pr_warning("snprintf() failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2777,7 +2817,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, for (i = 1; i < MAX_EVENT_INDEX; i++) { ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("Add suffix failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2841,7 +2884,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, bool allow_suffix) { const char *event, *group; - char buf[64]; + char buf[MAX_EVENT_NAME_LEN]; int ret; /* If probe_event or trace_event already have the name, reuse it */ @@ -2865,6 +2908,12 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, else group = PERFPROBE_GROUP; + if (strlen(group) >= MAX_EVENT_NAME_LEN) { + pr_err("Probe group string='%s' is too long (>= %d bytes)\n", + group, MAX_EVENT_NAME_LEN); + return -ENOMEM; + } + /* Get an unused new event name */ ret = get_new_event_name(buf, sizeof(buf), event, namelist, tev->point.retprobe, allow_suffix); @@ -3705,59 +3754,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) } } -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) -{ - int ret; - - ret = init_probe_symbol_maps(pevs->uprobes); - if (ret < 0) - return ret; - - ret = convert_perf_probe_events(pevs, npevs); - if (ret == 0) - ret = apply_perf_probe_events(pevs, npevs); - - cleanup_perf_probe_events(pevs, npevs); - - exit_probe_symbol_maps(); - return ret; -} - -int del_perf_probe_events(struct strfilter *filter) -{ - int ret, ret2, ufd = -1, kfd = -1; - char *str = strfilter__string(filter); - - if (!str) - return -EINVAL; - - /* Get current event names */ - ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); - if (ret < 0) - goto out; - - ret = probe_file__del_events(kfd, filter); - if (ret < 0 && ret != -ENOENT) - goto error; - - ret2 = probe_file__del_events(ufd, filter); - if (ret2 < 0 && ret2 != -ENOENT) { - ret = ret2; - goto error; - } - ret = 0; - -error: - if (kfd >= 0) - close(kfd); - if (ufd >= 0) - close(ufd); -out: - free(str); - - return ret; -} - int show_available_funcs(const char *target, struct nsinfo *nsi, struct strfilter *_filter, bool user) { diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 7e3b6c3d1f74..61a5f4ff4e9c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -159,7 +159,6 @@ void line_range__clear(struct line_range *lr); /* Initialize line range */ int line_range__init(struct line_range *lr); -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); @@ -168,8 +167,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); struct strfilter; -int del_perf_probe_events(struct strfilter *filter); - int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 3d50de3217d5..ec8ac242fedb 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) -{ - struct strlist *namelist; - int ret; - - namelist = strlist__new(NULL, NULL); - if (!namelist) - return -ENOMEM; - - ret = probe_file__get_events(fd, filter, namelist); - if (ret < 0) - goto out; - - ret = probe_file__del_strlist(fd, namelist); -out: - strlist__delete(namelist); - return ret; -} - /* Caller must ensure to remove this entry from list */ static void probe_cache_entry__delete(struct probe_cache_entry *entry) { diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 0dba88c0f5f0..c2bb6a5b9dcc 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); -int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 630e16c54ed5..7f2ee0cb43ca 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -56,7 +56,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Op *fb_ops, Dwarf_Die *sp_die, - unsigned int machine, + const struct probe_finder *pf, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; @@ -166,7 +166,7 @@ static_var: if (!tvar) return ret2; - regs = get_dwarf_regstr(regn, machine); + regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " @@ -451,7 +451,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - &pf->sp_die, pf->machine, pf->tvar); + &pf->sp_die, pf, pf->tvar); if (ret == -ENOENT && pf->skip_empty_arg) /* This can be found in other place. skip it */ return 0; @@ -602,7 +602,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -613,7 +612,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1136,9 +1134,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (gelf_getehdr(elf, &ehdr) == NULL) return -EINVAL; - pf->machine = ehdr.e_machine; + pf->e_machine = ehdr.e_machine; + pf->e_flags = ehdr.e_flags; -#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1148,7 +1146,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1175,7 +1172,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, - pf->machine, NULL) == 0) { + pf, /*tvar=*/NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1379,6 +1376,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); + dwarf_cfi_end(tf.pf.cfi_eh); + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); @@ -1404,7 +1403,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, - af->pf.machine, NULL); + &af->pf, /*tvar=*/NULL); if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; @@ -1583,8 +1582,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { - /* Get function entry information */ - func = basefunc = dwarf_diename(&spdie); + /* + * Get function entry information. + * + * As described in the document DWARF Debugging Information + * Format Version 5, section 2.22 Linkage Names, "mangled names, + * are used in various ways, ... to distinguish multiple + * entities that have the same name". + * + * Firstly try to get distinct linkage name, if fail then + * rollback to get associated name in DIE. + */ + func = basefunc = die_get_linkage_name(&spdie); + if (!func) + func = basefunc = dwarf_diename(&spdie); + if (!func || die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { @@ -1863,7 +1875,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id, const char *prefix = symbol_conf.source_prefix; if (sbuild_id && !prefix) { - if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + char prefixed_raw_path[PATH_MAX]; + + path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path); + + if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path)) return 0; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3add5ff516e1..be7b46ea2460 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -21,7 +21,7 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #include "debuginfo.h" @@ -63,14 +63,13 @@ struct probe_finder { struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ -#if _ELFUTILS_PREREQ(0, 142) - /* Call Frame Information from .eh_frame */ + /* Call Frame Information from .eh_frame. Owned by this struct. */ Dwarf_CFI *cfi_eh; - /* Call Frame Information from .debug_frame */ + /* Call Frame Information from .debug_frame. Not owned. */ Dwarf_CFI *cfi_dbg; -#endif Dwarf_Op *fb_ops; /* Frame base attribute */ - unsigned int machine; /* Target machine arch */ + unsigned int e_machine; /* ELF target machine arch */ + unsigned int e_flags; /* ELF target machine flags */ struct perf_probe_arg *pvar; /* Current target variable */ struct probe_trace_arg *tvar; /* Current result variable */ bool skip_empty_arg; /* Skip non-exist args */ @@ -104,6 +103,6 @@ struct line_finder { int found; }; -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ee3d43a7ba45..2096cdbaa53b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,7 +6,7 @@ #include <linux/err.h> #include <perf/cpumap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include <perf/mmap.h> #include "evlist.h" diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 73846b73d0cf..30638653ad2d 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx\n", + "\t\tC:%d TOD:%#llx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 53383e97ec9d..335217bb532b 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -98,12 +98,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.res2 = be32_to_cpu(tep->res2); color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" - " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n", offset, te.clock_base ? 'T' : ' ', te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', te.cfvn, te.csvn, te.cpu_speed, te.timestamp); - color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx" " Type:%x\n\n", te.progusage1, te.progusage2, te.progusage3, te.tod_base, te.mach_type); @@ -205,7 +205,7 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) char *ev_name = get_counter_name(ce.set, i, pmu); color_fprintf(stdout, color, - "\tCounter:%03d %s Value:%#018lx\n", i, + "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i, ev_name ?: "<unknown>", be64_to_cpu(*p)); free(ev_name); } @@ -260,7 +260,7 @@ static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) ev_name = get_counter_name(evsel->core.attr.config, pai_data.event_nr, evsel->pmu); - color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n", pai_data.event_nr, ev_name ?: "<unknown>", pai_data.event_val); free(ev_name); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e16257d5ab2c..85b7f188f729 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,7 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/time64.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <stdbool.h> /* perl needs the following define, right after including stdbool.h */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d7183134b669..8bdae066e839 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -31,7 +31,7 @@ #include <linux/compiler.h> #include <linux/time64.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "../build-id.h" @@ -793,7 +793,8 @@ static int set_regs_in_dict(PyObject *dict, static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { char sbuild_id[SBUILD_ID_SIZE]; @@ -809,6 +810,8 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(map__pgoff(al->map))); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -895,7 +898,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -920,7 +923,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); } if (sample->flags) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbaf07bf6c5f..507e6cba9545 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1171,18 +1171,24 @@ static int deliver_sample_value(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, - struct machine *machine) + struct machine *machine, + bool per_thread) { struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 *storage = NULL; if (sid) { + storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread); + } + + if (storage) { sample->id = v->id; - sample->period = v->value - sid->period; - sid->period = v->value; + sample->period = v->value - *storage; + *storage = v->value; } - if (!sid || sid->evsel == NULL) { + if (!storage || sid->evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } @@ -1203,17 +1209,19 @@ static int deliver_sample_group(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct machine *machine, - u64 read_format) + u64 read_format, + bool per_thread) { int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; if (tool->dont_split_sample_group) - return deliver_sample_value(evlist, tool, event, sample, v, machine); + return deliver_sample_value(evlist, tool, event, sample, v, machine, + per_thread); sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, - machine); + machine, per_thread); if (ret) break; } @@ -1228,6 +1236,7 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; u64 read_format = evsel->core.attr.read_format; + bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core); /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) @@ -1236,10 +1245,11 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine, read_format); + machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample, - &sample->read.one, machine); + &sample->read.one, machine, + per_thread); } static int machines__deliver_event(struct machines *machines, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 013020f33ece..9dd60c7869a2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -35,7 +35,7 @@ #include <linux/string.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif regex_t parent_regex; @@ -677,6 +677,102 @@ struct sort_entry sort_sym_ipc_null = { .se_width_idx = HISTC_SYMBOL_IPC, }; +/* --sort callchain_branch_predicted */ + +static int64_t +sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_predicted_snprintf( + struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + u64 branch_count, predicted_count; + double percent = 0.0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + &predicted_count, NULL, NULL); + + if (branch_count) + percent = predicted_count * 100.0 / branch_count; + + snprintf(str, sizeof(str), "%.1f%%", percent); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_predicted = { + .se_header = "Predicted", + .se_cmp = sort__callchain_branch_predicted_cmp, + .se_snprintf = hist_entry__callchain_branch_predicted_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED, +}; + +/* --sort callchain_branch_abort */ + +static int64_t +sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, abort_count; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, &abort_count, NULL); + + snprintf(str, sizeof(str), "%" PRId64, abort_count); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_abort = { + .se_header = "Abort", + .se_cmp = sort__callchain_branch_abort_cmp, + .se_snprintf = hist_entry__callchain_branch_abort_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT, +}; + +/* --sort callchain_branch_cycles */ + +static int64_t +sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, cycles_count, cycles = 0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, NULL, &cycles_count); + + if (branch_count) + cycles = cycles_count / branch_count; + + snprintf(str, sizeof(str), "%" PRId64 "", cycles); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_cycles = { + .se_header = "Cycles", + .se_cmp = sort__callchain_branch_cycles_cmp, + .se_snprintf = hist_entry__callchain_branch_cycles_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -2456,6 +2552,15 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), + DIM(SORT_CALLCHAIN_BRANCH_PREDICTED, + "callchain_branch_predicted", + sort_callchain_branch_predicted), + DIM(SORT_CALLCHAIN_BRANCH_ABORT, + "callchain_branch_abort", + sort_callchain_branch_abort), + DIM(SORT_CALLCHAIN_BRANCH_CYCLES, + "callchain_branch_cycles", + sort_callchain_branch_cycles) }; #undef DIM @@ -3484,7 +3589,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if ((sort__mode != SORT_MODE__BRANCH) && + strncasecmp(tok, "callchain_branch_predicted", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_abort", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_cycles", + strlen(tok))) return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9ff68c6786e7..a8572574e168 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -88,6 +88,9 @@ enum sort_type { SORT_SYM_IPC, SORT_ADDR_FROM, SORT_ADDR_TO, + SORT_CALLCHAIN_BRANCH_PREDICTED, + SORT_CALLCHAIN_BRANCH_ABORT, + SORT_CALLCHAIN_BRANCH_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ea96e4ebad8c..53dcdf07f5a2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -7,6 +7,7 @@ #include <perf/cpumap.h> #include "color.h" #include "counts.h" +#include "debug.h" #include "evlist.h" #include "evsel.h" #include "stat.h" @@ -21,6 +22,7 @@ #include "iostat.h" #include "pmu.h" #include "pmus.h" +#include "tool_pmu.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -71,6 +73,32 @@ static const char *aggr_header_std[] = { [AGGR_GLOBAL] = "" }; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh) +{ + const char * const colors[] = { + "", /* unknown */ + PERF_COLOR_RED, /* bad */ + PERF_COLOR_MAGENTA, /* nearly bad */ + PERF_COLOR_YELLOW, /* less good */ + PERF_COLOR_GREEN, /* good */ + }; + static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return colors[thresh]; +} + +static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh) +{ + const char * const strs[] = { + "unknown", + "bad", + "nearly bad", + "less good", + "good", + }; + static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return strs[thresh]; +} + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -403,13 +431,14 @@ static void do_new_line_std(struct perf_stat_config *config, } static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; int n; bool newline = os->newline; + const char *color = metric_threshold_classify__color(thresh); os->newline = false; @@ -441,7 +470,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) static void print_metric_csv(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -462,15 +491,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, static void print_metric_json(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh, const char *fmt __maybe_unused, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : \"%f\", ", val); - fprintf(out, "\"metric-unit\" : \"%s\"", unit); + if (unit) { + fprintf(out, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit); + if (thresh != METRIC_THRESHOLD_UNKNOWN) { + fprintf(out, ", \"metric-threshold\" : \"%s\"", + metric_threshold_classify__str(thresh)); + } + } if (!config->metric_only) fprintf(out, "}"); } @@ -557,13 +591,14 @@ static const char *fixunit(char *buf, struct evsel *evsel, } static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[1024], str[1024]; unsigned mlen = config->metric_only_len; + const char *color = metric_threshold_classify__color(thresh); if (!valid_only_metric(unit)) return; @@ -580,7 +615,8 @@ static void print_metric_only(struct perf_stat_config *config, } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { @@ -602,25 +638,29 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - char buf[64], *vals, *ends; + char buf[64], *ends; char tbuf[1024]; + const char *vals; if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); + if (!unit[0]) + return; snprintf(buf, sizeof(buf), fmt ?: "", val); - ends = vals = skip_spaces(buf); + vals = ends = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - if (!unit[0] || !vals[0]) - return; + if (!vals[0]) + vals = "none"; fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); os->first = false; } @@ -631,7 +671,8 @@ static void new_line_metric(struct perf_stat_config *config __maybe_unused, } static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { @@ -805,7 +846,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, os, NULL, "", "", 0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0); return; } @@ -860,7 +901,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out, &config->metric_events); } else { - pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0); } if (!config->metric_only) { @@ -871,38 +912,66 @@ static void printout(struct perf_stat_config *config, struct outstate *os, static void uniquify_event_name(struct evsel *counter) { - char *new_name; - char *config; - int ret = 0; + const char *name, *pmu_name; + char *new_name, *config; + int ret; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + /* The evsel has a "name=" config term or is from libpfm. */ + if (counter->use_config_name || counter->is_libpfm_event) + return; + + /* Legacy no PMU event, don't uniquify. */ + if (!counter->pmu || + (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW)) + return; - if (counter->uniquified_name || counter->use_config_name || - !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name, - strlen(counter->pmu_name))) + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX) return; - config = strchr(counter->name, '/'); + name = evsel__name(counter); + pmu_name = counter->pmu->name; + /* Already prefixed by the PMU name. */ + if (!strncmp(name, pmu_name, strlen(pmu_name))) + return; + + config = strchr(name, '/'); if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (evsel__is_hybrid(counter)) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); + int len = config - name; + + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); } else { - ret = asprintf(&new_name, "%s [%s]", - counter->name, counter->pmu_name); + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + int len = config - name; - if (ret) { - free(counter->name); - counter->name = new_name; + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); } } - - counter->uniquified_name = true; + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } } static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) @@ -940,15 +1009,29 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, int idx; /* + * Skip unsupported default events when not verbose. (default events + * are all marked 'skippable'). + */ + if (verbose == 0 && counter->skippable && !counter->supported) + return true; + + /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. */ if (config->aggr_mode == AGGR_THREAD && config->system_wide) return true; - /* Tool events have the software PMU but are only gathered on 1. */ - if (evsel__is_tool(counter)) - return true; + /* + * Many tool events are only gathered on the first index, skip other + * zero values. + */ + if (evsel__is_tool(counter)) { + struct aggr_cpu_id own_id = + config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 }); + + return !aggr_cpu_id__equal(id, &own_id); + } /* * Skip value 0 when it's an uncore event and the given aggr id @@ -1559,6 +1642,31 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_metric_end(config, os); } +static void disable_uniquify(struct evlist *evlist) +{ + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; + + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return; + } + } + evlist__for_each_entry_continue(evlist, counter) { + counter->uniquified_name = true; + } +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) @@ -1572,6 +1680,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf .first = true, }; + disable_uniquify(evlist); + if (config->iostat_run) evlist->selected = evlist__first(evlist); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99376c12dd8e..47718610d5d8 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -15,6 +15,7 @@ #include <linux/zalloc.h> #include "iostat.h" #include "util/hashmap.h" +#include "tool_pmu.h" struct stats walltime_nsecs_stats; struct rusage_stats ru_stats; @@ -76,7 +77,7 @@ void perf_stat__reset_shadow_stats(void) memset(&ru_stats, 0, sizeof(ru_stats)); } -static enum stat_type evsel__stat_type(const struct evsel *evsel) +static enum stat_type evsel__stat_type(struct evsel *evsel) { /* Fake perf_hw_cache_op_id values for use with evsel__match. */ u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | @@ -136,23 +137,19 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel) return STAT_NONE; } -static const char *get_ratio_color(const double ratios[3], double val) +static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val) { - const char *color = PERF_COLOR_NORMAL; + assert(ratios[0] > ratios[1]); + assert(ratios[1] > ratios[2]); - if (val > ratios[0]) - color = PERF_COLOR_RED; - else if (val > ratios[1]) - color = PERF_COLOR_MAGENTA; - else if (val > ratios[2]) - color = PERF_COLOR_YELLOW; - - return color; + return val > ratios[1] + ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD) + : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD); } static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) { - const struct evsel *cur; + struct evsel *cur; int evsel_ctx = evsel_context(evsel); evlist__for_each_entry(evsel->evlist, cur) { @@ -195,17 +192,21 @@ static void print_ratio(struct perf_stat_config *config, const struct evsel *evsel, int aggr_idx, double numerator, struct perf_stat_output_ctx *out, enum stat_type denominator_type, - const double color_ratios[3], const char *unit) + const double thresh_ratios[3], const char *_unit) { double denominator = find_stat(evsel, aggr_idx, denominator_type); + double ratio = 0; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; + const char *fmt = NULL; + const char *unit = NULL; if (numerator && denominator) { - double ratio = numerator / denominator * 100.0; - const char *color = get_ratio_color(color_ratios, ratio); - - out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, unit, 0); + ratio = numerator / denominator * 100.0; + thresh = get_ratio_thresh(thresh_ratios, ratio); + fmt = "%7.2f%%"; + unit = _unit; + } + out->print_metric(config, out->ctx, thresh, fmt, unit, ratio); } static void print_stalled_cycles_front(struct perf_stat_config *config, @@ -213,9 +214,9 @@ static void print_stalled_cycles_front(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {50.0, 30.0, 10.0}; + const double thresh_ratios[3] = {50.0, 30.0, 10.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "frontend cycles idle"); } @@ -224,9 +225,9 @@ static void print_stalled_cycles_back(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {75.0, 50.0, 20.0}; + const double thresh_ratios[3] = {75.0, 50.0, 20.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "backend cycles idle"); } @@ -235,9 +236,9 @@ static void print_branch_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios, "of all branches"); } @@ -246,9 +247,9 @@ static void print_l1d_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios, "of all L1-dcache accesses"); } @@ -257,9 +258,9 @@ static void print_l1i_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios, "of all L1-icache accesses"); } @@ -268,9 +269,9 @@ static void print_ll_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios, "of all LL-cache accesses"); } @@ -279,9 +280,9 @@ static void print_dtlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios, "of all dTLB cache accesses"); } @@ -290,9 +291,9 @@ static void print_itlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios, "of all iTLB cache accesses"); } @@ -301,9 +302,9 @@ static void print_cache_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios, "of all cache refs"); } @@ -319,15 +320,16 @@ static void print_instructions(struct perf_stat_config *config, find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); if (cycles) { - print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", - instructions / cycles); - } else - print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); - + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "insn per cycle", instructions / cycles); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "insn per cycle", 0); + } if (max_stalled && instructions) { out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", - max_stalled / instructions); + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "stalled cycles per insn", max_stalled / instructions); } } @@ -341,9 +343,12 @@ static void print_cycles(struct perf_stat_config *config, if (cycles && nsecs) { double ratio = cycles / nsecs; - out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + "GHz", ratio); + } else { + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "GHz", 0); + } } static void print_nsecs(struct perf_stat_config *config, @@ -356,10 +361,12 @@ static void print_nsecs(struct perf_stat_config *config, double wall_time = avg_stats(&walltime_nsecs_stats); if (wall_time) { - print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized", nsecs / (wall_time * evsel->scale)); - } else - print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "CPUs utilized", 0); + } } static int prepare_metric(const struct metric_expr *mexp, @@ -381,26 +388,35 @@ static int prepare_metric(const struct metric_expr *mexp, double scale; switch (evsel__tool_event(metric_events[i])) { - case PERF_TOOL_DURATION_TIME: + case TOOL_PMU__EVENT_DURATION_TIME: stats = &walltime_nsecs_stats; scale = 1e-9; break; - case PERF_TOOL_USER_TIME: + case TOOL_PMU__EVENT_USER_TIME: stats = &ru_stats.ru_utime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_SYSTEM_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: stats = &ru_stats.ru_stime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_NONE: + case TOOL_PMU__EVENT_NONE: pr_err("Invalid tool event 'none'"); abort(); - case PERF_TOOL_MAX: + case TOOL_PMU__EVENT_MAX: pr_err("Invalid tool event 'max'"); abort(); + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: default: - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); + pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i])); abort(); } val = avg_stats(stats) * scale; @@ -483,7 +499,7 @@ static void generic_metric(struct perf_stat_config *config, double ratio, scale, threshold; int i; void *ctxp = out->ctx; - const char *color = NULL; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; pctx = expr__ctx_new(); if (!pctx) @@ -501,13 +517,13 @@ static void generic_metric(struct perf_stat_config *config, if (!metric_events[i]) { if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; - char metric_bf[64]; + char metric_bf[128]; if (metric_threshold && expr__parse(&threshold, pctx, metric_threshold) == 0 && !isnan(threshold)) { - color = fpclassify(threshold) == FP_ZERO - ? PERF_COLOR_GREEN : PERF_COLOR_RED; + thresh = fpclassify(threshold) == FP_ZERO + ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD; } if (metric_unit && metric_name) { @@ -522,22 +538,22 @@ static void generic_metric(struct perf_stat_config *config, scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); - print_metric(config, ctxp, color, "%8.1f", + print_metric(config, ctxp, thresh, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, color, "%8.2f", + print_metric(config, ctxp, thresh, "%8.2f", metric_name ? metric_name : out->force_header ? evsel->name : "", ratio); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } @@ -573,7 +589,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, { bool need_full_name = perf_pmus__num_core_pmus() > 1; static const char *last_name; - static const char *last_pmu; + static const struct perf_pmu *last_pmu; char full_name[64]; /* @@ -584,21 +600,21 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * different metric events. */ if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { + if (!need_full_name || last_pmu != evsel->pmu) { out->print_metricgroup_header(config, ctxp, NULL); return; } } - if (need_full_name) - scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + if (need_full_name && evsel->pmu) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name); else scnprintf(full_name, sizeof(full_name), "%s", name); out->print_metricgroup_header(config, ctxp, full_name); last_name = name; - last_pmu = evsel->pmu_name; + last_pmu = evsel->pmu; } /** @@ -708,17 +724,21 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (unit != ' ') snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); - } else + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + unit_buf, ratio); + } else { num = 0; + } } } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, &num, NULL, out, metric_events); - if (num == 0) - print_metric(config, ctxp, NULL, NULL, NULL, 0); + if (num == 0) { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, + /*fmt=*/NULL, /*unit=*/NULL, 0); + } } /** diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0bd5467389e4..7c2ccdcc3fdb 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -553,7 +553,7 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) return false; - return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); + return evsel_a->pmu != evsel_b->pmu; } static void evsel__merge_aliases(struct evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fd7a187551bd..6f8cff3cd39a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -154,9 +154,21 @@ struct evlist; extern struct stats walltime_nsecs_stats; extern struct rusage_stats ru_stats; +enum metric_threshold_classify { + METRIC_THRESHOLD_UNKNOWN, + METRIC_THRESHOLD_BAD, + METRIC_THRESHOLD_NEARLY_BAD, + METRIC_THRESHOLD_LESS_GOOD, + METRIC_THRESHOLD_GOOD, +}; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh); + typedef void (*print_metric_t)(struct perf_stat_config *config, - void *ctx, const char *color, const char *unit, - const char *fmt, double val); + void *ctx, + enum metric_threshold_classify thresh, + const char *fmt, + const char *unit, + double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); /* Used to print the display name of the Default metricgroup for now. */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 116a642ad99d..308fc7ec88cc 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -263,6 +263,34 @@ char *strpbrk_esc(char *str, const char *stopset) return ptr; } +/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */ +char *strpbrk_esq(char *str, const char *stopset) +{ + char *_stopset = NULL; + char *ptr; + const char *squote = "'"; + const char *dquote = "\""; + + if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0) + return NULL; + + do { + ptr = strpbrk_esc(str, _stopset); + if (!ptr) + break; + if (*ptr == *squote) + ptr = strpbrk_esc(ptr + 1, squote); + else if (*ptr == *dquote) + ptr = strpbrk_esc(ptr + 1, dquote); + else + break; + str = ptr + 1; + } while (ptr); + + free(_stopset); + return ptr; +} + /* Like strdup, but do not copy a single backslash */ char *strdup_esc(const char *str) { @@ -293,6 +321,78 @@ char *strdup_esc(const char *str) return ret; } +/* Remove backslash right before quote and return next quote address. */ +static char *remove_consumed_esc(char *str, int len, int quote) +{ + char *ptr = str, *end = str + len; + + while (*ptr != quote && ptr < end) { + if (*ptr == '\\' && *(ptr + 1) == quote) { + memmove(ptr, ptr + 1, end - (ptr + 1)); + /* now *ptr is `quote`. */ + end--; + } + ptr++; + } + + return *ptr == quote ? ptr : NULL; +} + +/* + * Like strdup_esc, but keep quoted string as it is (and single backslash + * before quote is removed). If there is no closed quote, return NULL. + */ +char *strdup_esq(const char *str) +{ + char *d, *ret; + + /* If there is no quote, return normal strdup_esc() */ + d = strpbrk_esc((char *)str, "\"'"); + if (!d) + return strdup_esc(str); + + ret = strdup(str); + if (!ret) + return NULL; + + d = ret; + do { + d = strpbrk(d, "\\\"\'"); + if (!d) + break; + + if (*d == '"' || *d == '\'') { + /* This is non-escaped quote */ + int quote = *d; + int len = strlen(d + 1) + 1; + + /* + * Remove the start quote and remove consumed escape (backslash + * before quote) and remove the end quote. If there is no end + * quote, it is the input error. + */ + memmove(d, d + 1, len); + d = remove_consumed_esc(d, len, quote); + if (!d) + goto error; + memmove(d, d + 1, strlen(d + 1) + 1); + } + if (*d == '\\') { + memmove(d, d + 1, strlen(d + 1) + 1); + if (*d == '\\') { + /* double backslash -- keep the second one. */ + d++; + } + } + } while (*d != '\0'); + + return ret; + +error: + free(ret); + return NULL; +} + unsigned int hex(char c) { if (c >= '0' && c <= '9') diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 52cb8ba057c7..4c8bff47cfd3 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -37,6 +37,8 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +char *strpbrk_esq(char *str, const char *stopset); +char *strdup_esq(const char *str); unsigned int hex(char c); char *strreplace_chars(char needle, const char *haystack, const char *replace); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3bbf173ad822..0037f1163919 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -257,7 +257,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) * like in: * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] */ - if (prev->end == prev->start) { + if (prev->end == prev->start && prev->type != STT_NOTYPE) { const char *prev_mod; const char *curr_mod; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6c45ded922b6..69d8dcf5cf28 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -46,6 +46,10 @@ static const char *const *syscalltbl_native = syscalltbl_mips_n64; #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; static const char *const *syscalltbl_native = syscalltbl_loongarch; +#elif defined(__riscv) +#include <asm/syscalls.c> +const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID; +static const char *const *syscalltbl_native = syscalltbl_riscv; #else const int syscalltbl_native_max_id = 0; static const char *const syscalltbl_native[] = { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..2ee2cc30340f 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c index ff2b169e0085..6ca0b178fb6c 100644 --- a/tools/perf/util/threads.c +++ b/tools/perf/util/threads.c @@ -141,7 +141,7 @@ void threads__remove_all_threads(struct threads *threads) down_write(&table->lock); __threads_table_entry__set_last_match(table, NULL); - hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) { + hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) { struct thread *old_value; hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); @@ -175,7 +175,7 @@ int threads__for_each_thread(struct threads *threads, size_t bkt; down_read(&table->lock); - hashmap__for_each_entry((&table->shard), cur, bkt) { + hashmap__for_each_entry(&table->shard, cur, bkt) { int rc = fn((struct thread *)cur->pvalue, data); if (rc != 0) { diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c new file mode 100644 index 000000000000..4fb097578479 --- /dev/null +++ b/tools/perf/util/tool_pmu.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "cgroup.h" +#include "counts.h" +#include "cputopo.h" +#include "evsel.h" +#include "pmu.h" +#include "print-events.h" +#include "smt.h" +#include "time-utils.h" +#include "tool_pmu.h" +#include "tsc.h" +#include <api/fs/fs.h> +#include <api/io.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <fcntl.h> +#include <strings.h> + +static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", + "has_pmem", + "num_cores", + "num_cpus", + "num_cpus_online", + "num_dies", + "num_packages", + "slots", + "smt_on", + "system_tsc_freq", +}; + +bool tool_pmu__skip_event(const char *name __maybe_unused) +{ +#if !defined(__aarch64__) + /* The slots event should only appear on arm64. */ + if (strcasecmp(name, "slots") == 0) + return true; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + /* The system_tsc_freq event should only appear on x86. */ + if (strcasecmp(name, "system_tsc_freq") == 0) + return true; +#endif + return false; +} + +int tool_pmu__num_skip_events(void) +{ + int num = 0; + +#if !defined(__aarch64__) + num++; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + num++; +#endif + return num; +} + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev) +{ + if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) + return tool_pmu__event_names[ev]; + + return NULL; +} + +enum tool_pmu_event tool_pmu__str_to_event(const char *str) +{ + int i; + + if (tool_pmu__skip_event(str)) + return TOOL_PMU__EVENT_NONE; + + tool_pmu__for_each_event(i) { + if (!strcasecmp(str, tool_pmu__event_names[i])) + return i; + } + return TOOL_PMU__EVENT_NONE; +} + +bool perf_pmu__is_tool(const struct perf_pmu *pmu) +{ + return pmu && pmu->type == PERF_PMU_TYPE_TOOL; +} + +bool evsel__is_tool(const struct evsel *evsel) +{ + return perf_pmu__is_tool(evsel->pmu); +} + +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) +{ + if (!evsel__is_tool(evsel)) + return TOOL_PMU__EVENT_NONE; + + return (enum tool_pmu_event)evsel->core.attr.config; +} + +const char *evsel__tool_pmu_event_name(const struct evsel *evsel) +{ + return tool_pmu__event_to_str(evsel->core.attr.config); +} + +static bool read_until_char(struct io *io, char e) +{ + int c; + + do { + c = io__get_char(io); + if (c == -1) + return false; + } while (c != e); + return true; +} + +static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int i; + + io__init(&io, fd, buf, sizeof(buf)); + + /* Skip lines to relevant CPU. */ + for (i = -1; i < cpu.cpu; i++) { + if (!read_until_char(&io, '\n')) + return -EINVAL; + } + /* Skip to "cpu". */ + if (io__get_char(&io) != 'c') return -EINVAL; + if (io__get_char(&io) != 'p') return -EINVAL; + if (io__get_char(&io) != 'u') return -EINVAL; + + /* Skip N of cpuN. */ + if (!read_until_char(&io, ' ')) + return -EINVAL; + + i = 1; + while (true) { + if (io__get_dec(&io, val) != ' ') + break; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +static int read_pid_stat_field(int fd, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int c, i; + + io__init(&io, fd, buf, sizeof(buf)); + if (io__get_dec(&io, val) != ' ') + return -EINVAL; + if (field == 1) + return 0; + + /* Skip comm. */ + if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) + return -EINVAL; + if (field == 2) + return -EINVAL; /* String can't be returned. */ + + /* Skip state */ + if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) + return -EINVAL; + if (field == 3) + return -EINVAL; /* String can't be returned. */ + + /* Loop over numeric fields*/ + if (io__get_char(&io) != ' ') + return -EINVAL; + + i = 4; + while (true) { + c = io__get_dec(&io, val); + if (c == -1) + return -EINVAL; + if (c == -2) { + /* Assume a -ve was read */ + c = io__get_dec(&io, val); + *val *= -1; + } + if (c != ' ') + return -EINVAL; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads) +{ + if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && + !evsel->start_times) { + evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), + nthreads, + sizeof(__u64)); + if (!evsel->start_times) + return -ENOMEM; + } + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + enum tool_pmu_event ev = evsel__tool_event(evsel); + int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; + + if (ev == TOOL_PMU__EVENT_NUM_CPUS) + return 0; + + if (ev == TOOL_PMU__EVENT_DURATION_TIME) { + if (evsel->core.attr.sample_period) /* no sampling */ + return -EINVAL; + evsel->start_time = rdclock(); + return 0; + } + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + if (thread >= nthreads) + break; + + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { + bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; + __u64 *start_time = NULL; + int fd; + + if (evsel->core.attr.sample_period) { + /* no sampling */ + err = -EINVAL; + goto out_close; + } + if (pid > -1) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); + fd = open(buf, O_RDONLY); + evsel->pid_stat = true; + } else { + fd = open("/proc/stat", O_RDONLY); + } + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + start_time = xyarray__entry(evsel->start_times, idx, thread); + if (pid > -1) { + err = read_pid_stat_field(fd, system ? 15 : 14, + start_time); + } else { + struct perf_cpu cpu; + + cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); + err = read_stat_field(fd, cpu, system ? 3 : 1, + start_time); + } + if (err) + goto out_close; + } + + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + old_errno = errno; + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + errno = old_errno; + return err; +} + +#if !defined(__i386__) && !defined(__x86_64__) +u64 arch_get_tsc_freq(void) +{ + return 0; +} +#endif + +#if !defined(__aarch64__) +u64 tool_pmu__cpu_slots_per_cycle(void) +{ + return 0; +} +#endif + +static bool has_pmem(void) +{ + static bool has_pmem, cached; + const char *sysfs = sysfs__mountpoint(); + char path[PATH_MAX]; + + if (!cached) { + snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); + has_pmem = access(path, F_OK) == 0; + cached = true; + } + return has_pmem; +} + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +{ + const struct cpu_topology *topology; + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + *result = has_pmem() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NUM_CORES: + topology = online_topology(); + *result = topology->core_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS: + *result = cpu__max_present_cpu().cpu; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { + struct perf_cpu_map *online = cpu_map__online(); + + if (online) { + *result = perf_cpu_map__nr(online); + return true; + } + return false; + } + case TOOL_PMU__EVENT_NUM_DIES: + topology = online_topology(); + *result = topology->die_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_PACKAGES: + topology = online_topology(); + *result = topology->package_cpus_lists; + return true; + + case TOOL_PMU__EVENT_SLOTS: + *result = tool_pmu__cpu_slots_per_cycle(); + return *result ? true : false; + + case TOOL_PMU__EVENT_SMT_ON: + *result = smt_on() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + *result = arch_get_tsc_freq(); + return true; + + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_DURATION_TIME: + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: + case TOOL_PMU__EVENT_MAX: + default: + return false; + } +} + +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 *start_time, cur_time, delta_start; + u64 val; + int fd, err = 0; + struct perf_counts_values *count, *old_count = NULL; + bool adjust = false; + enum tool_pmu_event ev = evsel__tool_event(evsel); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + val = 0; + if (cpu_map_idx == 0 && thread == 0) { + if (!tool_pmu__read_event(ev, &val)) { + count->lost++; + val = 0; + } + } + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; + } + return 0; + case TOOL_PMU__EVENT_DURATION_TIME: + /* + * Pretend duration_time is only on the first CPU and thread, or + * else aggregation will scale duration_time by the number of + * CPUs/threads. + */ + start_time = &evsel->start_time; + if (cpu_map_idx == 0 && thread == 0) + cur_time = rdclock(); + else + cur_time = *start_time; + break; + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: { + bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + + start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + lseek(fd, SEEK_SET, 0); + if (evsel->pid_stat) { + /* The event exists solely on 1 CPU. */ + if (cpu_map_idx == 0) + err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); + else + cur_time = 0; + } else { + /* The event is for all threads. */ + if (thread == 0) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, + cpu_map_idx); + + err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); + } else { + cur_time = 0; + } + } + adjust = true; + break; + } + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_MAX: + default: + err = -EINVAL; + } + if (err) + return err; + + delta_start = cur_time - *start_time; + if (adjust) { + __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + + delta_start *= 1000000000 / ticks_per_sec; + } + count->val = delta_start; + count->ena = count->run = delta_start; + count->lost = 0; + return 0; +} + +struct perf_pmu *perf_pmus__tool_pmu(void) +{ + static struct perf_pmu tool = { + .name = "tool", + .type = PERF_PMU_TYPE_TOOL, + .aliases = LIST_HEAD_INIT(tool.aliases), + .caps = LIST_HEAD_INIT(tool.caps), + .format = LIST_HEAD_INIT(tool.format), + }; + if (!tool.events_table) + tool.events_table = find_core_events_table("common", "common"); + + return &tool; +} diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h new file mode 100644 index 000000000000..a60184859080 --- /dev/null +++ b/tools/perf/util/tool_pmu.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOL_PMU_H +#define __TOOL_PMU_H + +#include "pmu.h" + +struct evsel; +struct perf_thread_map; +struct print_callbacks; + +enum tool_pmu_event { + TOOL_PMU__EVENT_NONE = 0, + TOOL_PMU__EVENT_DURATION_TIME, + TOOL_PMU__EVENT_USER_TIME, + TOOL_PMU__EVENT_SYSTEM_TIME, + TOOL_PMU__EVENT_HAS_PMEM, + TOOL_PMU__EVENT_NUM_CORES, + TOOL_PMU__EVENT_NUM_CPUS, + TOOL_PMU__EVENT_NUM_CPUS_ONLINE, + TOOL_PMU__EVENT_NUM_DIES, + TOOL_PMU__EVENT_NUM_PACKAGES, + TOOL_PMU__EVENT_SLOTS, + TOOL_PMU__EVENT_SMT_ON, + TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + + TOOL_PMU__EVENT_MAX, +}; + +#define tool_pmu__for_each_event(ev) \ + for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++) + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev); +enum tool_pmu_event tool_pmu__str_to_event(const char *str); +bool tool_pmu__skip_event(const char *name); +int tool_pmu__num_skip_events(void); + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); + +u64 tool_pmu__cpu_slots_per_cycle(void); + +bool perf_pmu__is_tool(const struct perf_pmu *pmu); + +bool evsel__is_tool(const struct evsel *evsel); +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); +const char *evsel__tool_pmu_event_name(const struct evsel *evsel); +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads); +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +struct perf_pmu *perf_pmus__tool_pmu(void); + +#endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index f0332bd3a501..41d53e1b43e7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -12,7 +12,7 @@ #include <linux/ctype.h> #include <linux/kernel.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -116,12 +116,6 @@ void event_format__fprintf(struct tep_event *event, trace_seq_destroy(&s); } -void event_format__print(struct tep_event *event, - int cpu, void *data, int size) -{ - return event_format__fprintf(event, cpu, data, size, stdout); -} - /* * prev_state is of size long, which is 32 bits on 32 bit architectures. * As it needs to have the same bits for both 32 bit and 64 bit architectures diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1162c49b8082..ecbbb93f0185 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index bd0000300c77..5596fcda2c10 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -10,7 +10,7 @@ #include <string.h> #include <errno.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ad75b31e09b..6a8c66c64b70 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -8,7 +8,7 @@ #include <fcntl.h> #include <linux/kernel.h> #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <api/fs/tracing_path.h> #include <api/fs/fs.h> #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index bbf8b26bc8da..79b939f947dd 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -42,9 +42,6 @@ struct tep_event *trace_event__tp_format_id(int id); void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct tep_event *event, - int cpu, void *data, int size); - int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); @@ -150,7 +147,7 @@ int common_lock_depth(struct scripting_context *context); int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) -#include <traceevent/event-parse.h> +#include <event-parse.h> static inline bool tep_field_is_relative(unsigned long flags) { diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 2e33a20e1e1b..511a517ce613 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) size_t ret; ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); - ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult); ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); /* diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 88fd1c4c1cb8..57ce8449647f 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -double arch_get_tsc_freq(void); +u64 arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9d55a13787ce..0f031eb80b4c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) @@ -336,91 +342,6 @@ bool perf_event_paranoid_check(int max_level) return perf_event_paranoid() <= max_level; } -static int -fetch_ubuntu_kernel_version(unsigned int *puint) -{ - ssize_t len; - size_t line_len = 0; - char *ptr, *line = NULL; - int version, patchlevel, sublevel, err; - FILE *vsig; - - if (!puint) - return 0; - - vsig = fopen("/proc/version_signature", "r"); - if (!vsig) { - pr_debug("Open /proc/version_signature failed: %s\n", - strerror(errno)); - return -1; - } - - len = getline(&line, &line_len, vsig); - fclose(vsig); - err = -1; - if (len <= 0) { - pr_debug("Reading from /proc/version_signature failed: %s\n", - strerror(errno)); - goto errout; - } - - ptr = strrchr(line, ' '); - if (!ptr) { - pr_debug("Parsing /proc/version_signature failed: %s\n", line); - goto errout; - } - - err = sscanf(ptr + 1, "%d.%d.%d", - &version, &patchlevel, &sublevel); - if (err != 3) { - pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", - line); - goto errout; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - err = 0; -errout: - free(line); - return err; -} - -int -fetch_kernel_version(unsigned int *puint, char *str, - size_t str_size) -{ - struct utsname utsname; - int version, patchlevel, sublevel, err; - bool int_ver_ready = false; - - if (access("/proc/version_signature", R_OK) == 0) - if (!fetch_ubuntu_kernel_version(puint)) - int_ver_ready = true; - - if (uname(&utsname)) - return -1; - - if (str && str_size) { - strncpy(str, utsname.release, str_size); - str[str_size - 1] = '\0'; - } - - if (!puint || int_ver_ready) - return 0; - - err = sscanf(utsname.release, "%d.%d.%d", - &version, &patchlevel, &sublevel); - - if (err != 3) { - pr_debug("Unable to get kernel version from uname '%s'\n", - utsname.release); - return -1; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - return 0; -} - int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9966c21aaf04..3423778e39a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -21,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest; @@ -43,14 +46,6 @@ int sysctl__max_stack(void); bool sysctl__nmi_watchdog_enabled(void); -int fetch_kernel_version(unsigned int *puint, - char *str, size_t str_sz); -#define KVER_VERSION(x) (((x) >> 16) & 0xff) -#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) -#define KVER_SUBLEVEL(x) ((x) & 0xff) -#define KVER_FMT "%d.%d.%d" -#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) - int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT @@ -81,13 +76,6 @@ char *perf_exe(char *buf, int len); #endif #endif -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags); - struct perf_debuginfod { const char *urls; bool set; diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 067717bce1d4..a7f7ed01421c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -33,6 +33,9 @@ name as necessary to disambiguate it from others is necessary. Note that option msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute <device> is a perf device from /sys/bus/event_source/devices/<device> eg. cstate_core + On Intel hybrid platforms, instead of one "cpu" perf device there are two, "cpu_core" and "cpu_atom" devices for P and E cores respectively. + Turbostat, in this case, allow user to use "cpu" device and will automatically detect the type of a CPU and translate it to "cpu_core" and "cpu_atom" accordingly. + For a complete example see "ADD PERF COUNTER EXAMPLE #2 (using virtual "cpu" device)". <event> is a perf event for given device from /sys/bus/event_source/devices/<device>/events/<event> eg. c1-residency perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency @@ -187,6 +190,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt. +.PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. @@ -387,6 +392,28 @@ CPU pCPU%c1 CPU%c1 .fi +.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) +Here we run on hybrid, Raptor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +We add a counter showing number of L3 cache misses, using virtual "cpu" device, +labeling it with the column header, "VCMISS". +We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, +labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, +labeling it with the column header, "ECMISS". This will fail on pcore cpu0. +We display it only once, after the conclusion of 0.1 second sleep. +.nf +sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 +0.104630 sec +CPU ECMISS PCMISS VCMISS +- 0x0000000000000000 0x0000000000000000 0x0000000000000000 +0 0x0000000000000000 0x0000000000007951 0x0000000000007796 +12 0x000000000001137a 0x0000000000000000 0x0000000000011392 + +.fi + .SH ADD PMT COUNTER EXAMPLE Here we limit turbostat to showing just the CPU number 0. We add two counters, showing crystal clock count and the DC6 residency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 089220aaa5c9..58a487c225a7 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -31,6 +31,9 @@ ) // end copied section +#define CPUID_LEAF_MODEL_ID 0x1A +#define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 + #define X86_VENDOR_INTEL 0 #include INTEL_FAMILY_HEADER @@ -89,6 +92,9 @@ #define PERF_DEV_NAME_BYTES 32 #define PERF_EVT_NAME_BYTES 32 +#define INTEL_ECORE_TYPE 0x20 +#define INTEL_PCORE_TYPE 0x40 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -194,6 +200,8 @@ struct msr_counter bic[] = { { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -256,6 +264,8 @@ struct msr_counter bic[] = { #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) #define BIC_Diec6 (1ULL << 58) +#define BIC_SysWatt (1ULL << 59) +#define BIC_Sys_J (1ULL << 60) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) @@ -263,7 +273,7 @@ struct msr_counter bic[] = { #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; @@ -370,7 +380,6 @@ enum gfx_sysfs_idx { }; struct gfx_sysfs_info { - const char *path; FILE *fp; unsigned int val; unsigned long long val_ull; @@ -502,12 +511,15 @@ enum rapl_msrs { RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ + RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ + RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ }; #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) +#define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) @@ -708,7 +720,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -725,42 +737,44 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; +/* Copied from cnl_features, with PC7/PC9 removed */ static const struct platform_features adl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, + .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = cnl_features.has_nhm_msrs, + .has_config_tdp = cnl_features.has_config_tdp, + .bclk_freq = cnl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, + .cst_limit = cnl_features.cst_limit, + .has_irtl_msrs = cnl_features.has_irtl_msrs, + .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, + .trl_msrs = cnl_features.trl_msrs, + .tcc_offset_bits = cnl_features.tcc_offset_bits, + .rapl_msrs = cnl_features.rapl_msrs, + .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; -static const struct platform_features arl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, +/* Copied from adl_features, with PC3/PC8 removed */ +static const struct platform_features lnl_features = { + .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = adl_features.has_nhm_msrs, + .has_config_tdp = adl_features.has_config_tdp, + .bclk_freq = adl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, + .cst_limit = adl_features.cst_limit, + .has_irtl_msrs = adl_features.has_irtl_msrs, + .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, + .trl_msrs = adl_features.trl_msrs, + .tcc_offset_bits = adl_features.tcc_offset_bits, + .rapl_msrs = adl_features.rapl_msrs, + .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; static const struct platform_features skx_features = { @@ -790,7 +804,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -806,7 +820,7 @@ static const struct platform_features spr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features srf_features = { @@ -822,7 +836,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -838,7 +852,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -997,18 +1011,19 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_SAPPHIRERAPIDS_X, &spr_features }, { INTEL_EMERALDRAPIDS_X, &spr_features }, { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_D, &spr_features }, { INTEL_LAKEFIELD, &cnl_features }, { INTEL_ALDERLAKE, &adl_features }, { INTEL_ALDERLAKE_L, &adl_features }, { INTEL_RAPTORLAKE, &adl_features }, { INTEL_RAPTORLAKE_P, &adl_features }, { INTEL_RAPTORLAKE_S, &adl_features }, - { INTEL_METEORLAKE, &cnl_features }, - { INTEL_METEORLAKE_L, &cnl_features }, - { INTEL_ARROWLAKE_H, &arl_features }, - { INTEL_ARROWLAKE_U, &arl_features }, - { INTEL_ARROWLAKE, &arl_features }, - { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_METEORLAKE, &adl_features }, + { INTEL_METEORLAKE_L, &adl_features }, + { INTEL_ARROWLAKE_H, &adl_features }, + { INTEL_ARROWLAKE_U, &adl_features }, + { INTEL_ARROWLAKE, &adl_features }, + { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1100,6 +1115,7 @@ enum rapl_rci_index { RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, RAPL_RCI_INDEX_CORE_ENERGY = 6, + RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, NUM_RAPL_COUNTERS, }; @@ -1126,6 +1142,7 @@ struct rapl_counter_info_t { struct rapl_counter_info_t *rapl_counter_info_perdomain; unsigned int rapl_counter_info_perdomain_size; +#define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) struct rapl_counter_arch_info { @@ -1247,6 +1264,19 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .compat_scale = 1.0, .flags = 0, }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic = BIC_SysWatt | BIC_Sys_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, }; struct rapl_counter { @@ -1674,6 +1704,7 @@ enum { IDX_PP1_ENERGY, IDX_PKG_PERF, IDX_DRAM_PERF, + IDX_PSYS_ENERGY, IDX_COUNT, }; @@ -1718,6 +1749,9 @@ off_t idx_to_offset(int idx) case IDX_DRAM_PERF: offset = MSR_DRAM_PERF_STATUS; break; + case IDX_PSYS_ENERGY: + offset = MSR_PLATFORM_ENERGY_STATUS; + break; default: offset = -1; } @@ -1748,6 +1782,9 @@ int offset_to_idx(off_t offset) case MSR_DRAM_PERF_STATUS: idx = IDX_DRAM_PERF; break; + case MSR_PLATFORM_ENERGY_STATUS: + idx = IDX_PSYS_ENERGY; + break; default: idx = -1; } @@ -1769,6 +1806,8 @@ int idx_valid(int idx) return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + case IDX_PSYS_ENERGY: + return platform->rapl_msrs & RAPL_PSYS; default: return 0; } @@ -1840,6 +1879,10 @@ struct system_summary { struct pkg_data packages; } average; +struct platform_counters { + struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ +} platform_counters_odd, platform_counters_even; + struct cpu_topology { int physical_package_id; int die_id; @@ -1848,6 +1891,7 @@ struct cpu_topology { int logical_node_id; /* 0-based count within the package */ int physical_core_id; int thread_id; + int type; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -2291,7 +2335,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2365,7 +2409,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2496,13 +2540,18 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } ppmt = ppmt->next; } + if (DO_BIC(BIC_SysWatt)) + outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_Sys_J)) + outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); + outp += sprintf(outp, "\n"); } @@ -2510,6 +2559,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p { int i; struct msr_counter *mp; + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); @@ -2581,6 +2631,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); + outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -2619,6 +2670,9 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir */ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + static int count; + + struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; char *fmt8; int i; @@ -2628,6 +2682,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *delim = "\t"; int printed = 0; + if (t == &average.threads) { + pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; + ++count; + } + /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -2788,6 +2847,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2799,9 +2860,6 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = t->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } @@ -2869,6 +2927,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -2880,9 +2940,6 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = c->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } @@ -3068,6 +3125,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) @@ -3079,14 +3138,18 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data break; case PMT_TYPE_XTAL_TIME: - const unsigned long value_raw = p->pmt_counter[i]; - const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; } } + if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); + if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -3394,6 +3457,11 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; } +void delta_platform(struct platform_counters *new, struct platform_counters *old) +{ + old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; +} + void rapl_counter_clear(struct rapl_counter *c) { c->raw_value = 0; @@ -4123,6 +4191,9 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) { + if (rci->source[idx] == COUNTER_SOURCE_NONE) + return; + rc->raw_value = rci->data[idx]; rc->unit = rci->unit[idx]; rc->scale = rci->scale[idx]; @@ -4130,6 +4201,7 @@ void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) { + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; @@ -4157,6 +4229,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { case COUNTER_SOURCE_NONE: + rci->data[i] = 0; break; case COUNTER_SOURCE_PERF: @@ -4195,7 +4268,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct } } - BUILD_BUG_ON(NUM_RAPL_COUNTERS != 7); + BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); @@ -4203,6 +4276,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); + write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); return 0; } @@ -5385,6 +5459,9 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) if (*next == '-') /* no negative cpu numbers */ return 1; + if (*next == '\0' || *next == '\n') + break; + start = strtoul(next, &next, 10); if (start >= CPU_SUBSET_MAXCPUS) @@ -5656,6 +5733,32 @@ int init_thread_id(int cpu) return 0; } +int set_my_cpu_type(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + + __cpuid(0, max_level, ebx, ecx, edx); + + if (max_level < CPUID_LEAF_MODEL_ID) + return 0; + + __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); + + return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); +} + +int set_cpu_hybrid_type(int cpu) +{ + if (cpu_migrate(cpu)) + return -1; + + int type = set_my_cpu_type(); + + cpus[cpu].type = type; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -5728,28 +5831,21 @@ int snapshot_proc_interrupts(void) */ int snapshot_graphics(int idx) { - FILE *fp; int retval; + rewind(gfx_info[idx].fp); + switch (idx) { case GFX_rc6: case SAM_mc6: - fp = fopen_or_die(gfx_info[idx].path, "r"); - retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); + retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); if (retval != 1) err(1, "rc6"); - fclose(fp); return 0; case GFX_MHz: case GFX_ACTMHz: case SAM_MHz: case SAM_ACTMHz: - if (gfx_info[idx].fp == NULL) { - gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); - } else { - rewind(gfx_info[idx].fp); - fflush(gfx_info[idx].fp); - } retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); if (retval != 1) err(1, "MHz"); @@ -6116,6 +6212,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); @@ -6139,6 +6236,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_even, &platform_counters_odd); compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); @@ -6442,14 +6540,25 @@ static void probe_intel_uncore_frequency(void) probe_intel_uncore_frequency_legacy(); } +static void set_graphics_fp(char *path, int idx) +{ + if (!access(path, R_OK)) + gfx_info[idx].fp = fopen_or_die(path, "r"); +} + +/* Enlarge this if there are /sys/class/drm/card2 ... */ +#define GFX_MAX_CARDS 2 + static void probe_graphics(void) { + char path[PATH_MAX]; + int i; + /* Xe graphics sysfs knobs */ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { FILE *fp; char buf[8]; bool gt0_is_gt; - int idx; fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); if (!fp) @@ -6468,81 +6577,76 @@ static void probe_graphics(void) else goto next; - idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); - idx = gt0_is_gt ? GFX_MHz : SAM_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); - idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); - idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; - if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); - idx = gt0_is_gt ? SAM_MHz : GFX_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); - idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); goto end; } next: /* New i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; + for (i = 0; i < GFX_MAX_CARDS; i++) { + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + if (!access(path, R_OK)) + break; + } - if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; + if (i == GFX_MAX_CARDS) + goto legacy_i915; - if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + set_graphics_fp(path, GFX_rc6); - if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) - gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); + set_graphics_fp(path, GFX_MHz); - if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) - gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); + set_graphics_fp(path, GFX_ACTMHz); - if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) - gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); + set_graphics_fp(path, SAM_mc6); - goto end; - } + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); + set_graphics_fp(path, SAM_MHz); + + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); + set_graphics_fp(path, SAM_ACTMHz); + goto end; + +legacy_i915: /* Fall back to traditional i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); - if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); + if (!gfx_info[GFX_MHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); - if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); + if (!gfx_info[GFX_ACTMHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); end: - if (gfx_info[GFX_rc6].path) + if (gfx_info[GFX_rc6].fp) BIC_PRESENT(BIC_GFX_rc6); - if (gfx_info[GFX_MHz].path) + if (gfx_info[GFX_MHz].fp) BIC_PRESENT(BIC_GFXMHz); - if (gfx_info[GFX_ACTMHz].path) + if (gfx_info[GFX_ACTMHz].fp) BIC_PRESENT(BIC_GFXACTMHz); - if (gfx_info[SAM_mc6].path) + if (gfx_info[SAM_mc6].fp) BIC_PRESENT(BIC_SAM_mc6); - if (gfx_info[SAM_MHz].path) + if (gfx_info[SAM_MHz].fp) BIC_PRESENT(BIC_SAMMHz); - if (gfx_info[SAM_ACTMHz].path) + if (gfx_info[SAM_ACTMHz].fp) BIC_PRESENT(BIC_SAMACTMHz); } @@ -6911,8 +7015,8 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; + const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; + const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; if (rapl_joules) bic_enabled &= ~bic_watt_bits; @@ -7572,6 +7676,9 @@ void rapl_perf_init(void) domain_visited[next_domain] = 1; + if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) + continue; + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; /* Check if the counter is enabled and accessible */ @@ -8196,7 +8303,7 @@ void topology_probe(bool startup) set_max_cpu_num(); topo.num_cpus = 0; for_all_proc_cpus(count_cpus); - if (!summary_only && topo.num_cpus > 1) + if (!summary_only) BIC_PRESENT(BIC_CPU); if (debug > 1) @@ -8284,6 +8391,8 @@ void topology_probe(bool startup) for_all_proc_cpus(init_thread_id); + for_all_proc_cpus(set_cpu_hybrid_type); + /* * For online cpus * find max_core_id, max_package_id @@ -8332,7 +8441,7 @@ void topology_probe(bool startup) topo.cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); - if (!summary_only && topo.cores_per_node > 1) + if (!summary_only) BIC_PRESENT(BIC_Core); topo.num_die = topo.max_die_id + 1; @@ -8548,6 +8657,35 @@ void check_perf_access(void) bic_enabled &= ~BIC_IPC; } +bool perf_has_hybrid_devices(void) +{ + /* + * 0: unknown + * 1: has separate perf device for p and e core + * -1: doesn't have separate perf device for p and e core + */ + static int cached; + + if (cached > 0) + return true; + + if (cached < 0) + return false; + + if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { + cached = -1; + return false; + } + + if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { + cached = -1; + return false; + } + + cached = 1; + return true; +} + int added_perf_counters_init_(struct perf_counter_info *pinfo) { size_t num_domains = 0; @@ -8604,29 +8742,56 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (domain_visited[next_domain]) continue; - perf_type = read_perf_type(pinfo->device); + /* + * Intel hybrid platforms expose different perf devices for P and E cores. + * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are + * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". + * + * This makes it more complicated to the user, because most of the counters + * are available on both and have to be handled manually, otherwise. + * + * Code below, allow user to use the old "cpu" name, which is translated accordingly. + */ + const char *perf_device = pinfo->device; + + if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { + switch (cpus[cpu].type) { + case INTEL_PCORE_TYPE: + perf_device = "cpu_core"; + break; + + case INTEL_ECORE_TYPE: + perf_device = "cpu_atom"; + break; + + default: /* Don't change, we will probably fail and report a problem soon. */ + break; + } + } + + perf_type = read_perf_type(perf_device); if (perf_type == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "type"); + __func__, perf_device, pinfo->event, "type"); continue; } - perf_config = read_perf_config(pinfo->device, pinfo->event); + perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "config"); + __func__, perf_device, pinfo->event, "config"); continue; } /* Scale is not required, some counters just don't have it. */ - perf_scale = read_perf_scale(pinfo->device, pinfo->event); + perf_scale = read_perf_scale(perf_device, pinfo->event); if (perf_scale == 0.0) perf_scale = 1.0; fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, pinfo->device, pinfo->event, cpu); + __func__, perf_device, pinfo->event, cpu); continue; } @@ -8636,7 +8801,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (debug) fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -9071,7 +9236,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.07.26 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -9781,7 +9946,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; @@ -9805,6 +9970,12 @@ void cmdline(int argc, char **argv) break; case 'D': dump_only++; + /* + * Force the no_perf early to prevent using it as a source. + * User asks for raw values, but perf returns them relative + * to the opening of the file descriptor. + */ + no_perf = 1; break; case 'e': /* --enable specified counter */ diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 2f36b7b6418d..625f5b046776 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -40,9 +40,9 @@ void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_fl void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; -bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym; -void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak; +void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index 21deea320bd7..e938156ed0a0 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -97,7 +97,7 @@ restart: SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); CPU_ZERO(cpuset); CPU_SET(skel->rodata->central_cpu, cpuset); - SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset), + SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), "Failed to affinitize to central CPU %d (max %d)", skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1); CPU_FREE(cpuset); diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch index f6a50f06dfc4..eabfe9f411d9 100644 --- a/tools/scripts/Makefile.arch +++ b/tools/scripts/Makefile.arch @@ -7,8 +7,8 @@ HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/riscv.*/riscv/ -e s/loongarch.*/loongarch/) -ifndef ARCH -ARCH := $(HOSTARCH) +ifeq ($(strip $(ARCH)),) +override ARCH := $(HOSTARCH) endif SRCARCH := $(ARCH) diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index 077e6883921d..f088792a8925 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -4,4 +4,4 @@ #include "cxl.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ -EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 050725afa45d..d0337c11f9ee 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1531,5 +1531,5 @@ MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(ACPI); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("ACPI"); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 71916e0e1546..347c1e7b37bd 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1679,4 +1679,4 @@ static struct platform_driver cxl_mock_mem_driver = { module_platform_driver(cxl_mock_mem_driver); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index f4ce96cc11d4..450c7566c33f 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -76,7 +76,7 @@ int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, ACPI); +EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, "ACPI"); acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, @@ -147,7 +147,7 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) { @@ -162,7 +162,7 @@ int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) @@ -179,7 +179,7 @@ int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { @@ -194,7 +194,7 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL"); int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) { @@ -209,7 +209,7 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, @@ -226,7 +226,7 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) @@ -242,7 +242,7 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, @@ -266,7 +266,7 @@ struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, return dport; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, "CXL"); resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, struct cxl_dport *dport) @@ -283,7 +283,7 @@ resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, return component_reg_phys; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, "CXL"); void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) { @@ -297,7 +297,7 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_endpoint_parse_cdat(port); put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, "CXL"); void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { @@ -309,8 +309,8 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS(ACPI); -MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS("ACPI"); +MODULE_IMPORT_NS("CXL"); diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index bc74088c458a..676fa99a8b19 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -23,7 +23,7 @@ from typing import Iterable, List, Optional, Sequence, Tuple import kunit_json import kunit_kernel import kunit_parser -from kunit_printer import stdout +from kunit_printer import stdout, null_printer class KunitStatus(Enum): SUCCESS = auto() @@ -49,6 +49,8 @@ class KunitBuildRequest(KunitConfigRequest): class KunitParseRequest: raw_output: Optional[str] json: Optional[str] + summary: bool + failed: bool @dataclass class KunitExecRequest(KunitParseRequest): @@ -235,11 +237,18 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input parse_time = time.time() - parse_start return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test + default_printer = stdout + if request.summary or request.failed: + default_printer = null_printer # Actually parse the test results. - test = kunit_parser.parse_run_tests(input_data) + test = kunit_parser.parse_run_tests(input_data, default_printer) parse_time = time.time() - parse_start + if request.failed: + kunit_parser.print_test(test, request.failed, stdout) + kunit_parser.print_summary_line(test, stdout) + if request.json: json_str = kunit_json.get_json_result( test=test, @@ -413,6 +422,14 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: help='Prints parsed test results as JSON to stdout or a file if ' 'a filename is specified. Does nothing if --raw_output is set.', type=str, const='stdout', default=None, metavar='FILE') + parser.add_argument('--summary', + help='Prints only the summary line for parsed test results.' + 'Does nothing if --raw_output is set.', + action='store_true') + parser.add_argument('--failed', + help='Prints only the failed parsed test results and summary line.' + 'Does nothing if --raw_output is set.', + action='store_true') def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree: @@ -448,6 +465,8 @@ def run_handler(cli_args: argparse.Namespace) -> None: jobs=cli_args.jobs, raw_output=cli_args.raw_output, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -495,6 +514,8 @@ def exec_handler(cli_args: argparse.Namespace) -> None: exec_request = KunitExecRequest(raw_output=cli_args.raw_output, build_dir=cli_args.build_dir, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -520,7 +541,8 @@ def parse_handler(cli_args: argparse.Namespace) -> None: # We know nothing about how the result was created! metadata = kunit_json.Metadata() request = KunitParseRequest(raw_output=cli_args.raw_output, - json=cli_args.json) + json=cli_args.json, summary=cli_args.summary, + failed=cli_args.failed) result, _ = parse_tests(request, metadata, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 61931c4926fd..e76d7894b6c5 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -105,7 +105,9 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._kconfig = qemu_arch_params.kconfig self._qemu_arch = qemu_arch_params.qemu_arch self._kernel_path = qemu_arch_params.kernel_path - self._kernel_command_line = qemu_arch_params.kernel_command_line + ' kunit_shutdown=reboot' + self._kernel_command_line = qemu_arch_params.kernel_command_line + if 'kunit_shutdown=' not in self._kernel_command_line: + self._kernel_command_line += ' kunit_shutdown=reboot' self._extra_qemu_params = qemu_arch_params.extra_qemu_params self._serial = qemu_arch_params.serial diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index ce34be15c929..29fc27e8949b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -17,7 +17,7 @@ import textwrap from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple -from kunit_printer import stdout +from kunit_printer import Printer, stdout class Test: """ @@ -54,10 +54,10 @@ class Test: """Returns string representation of a Test class object.""" return str(self) - def add_error(self, error_message: str) -> None: + def add_error(self, printer: Printer, error_message: str) -> None: """Records an error that occurred while parsing this test.""" self.counts.errors += 1 - stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') + printer.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') def ok_status(self) -> bool: """Returns true if the status was ok, i.e. passed or skipped.""" @@ -251,7 +251,7 @@ KTAP_VERSIONS = [1] TAP_VERSIONS = [13, 14] def check_version(version_num: int, accepted_versions: List[int], - version_type: str, test: Test) -> None: + version_type: str, test: Test, printer: Printer) -> None: """ Adds error to test object if version number is too high or too low. @@ -263,13 +263,14 @@ def check_version(version_num: int, accepted_versions: List[int], version_type - 'KTAP' or 'TAP' depending on the type of version line. test - Test object for current test being parsed + printer - Printer object to output error """ if version_num < min(accepted_versions): - test.add_error(f'{version_type} version lower than expected!') + test.add_error(printer, f'{version_type} version lower than expected!') elif version_num > max(accepted_versions): - test.add_error(f'{version_type} version higer than expected!') + test.add_error(printer, f'{version_type} version higer than expected!') -def parse_ktap_header(lines: LineStream, test: Test) -> bool: +def parse_ktap_header(lines: LineStream, test: Test, printer: Printer) -> bool: """ Parses KTAP/TAP header line and checks version number. Returns False if fails to parse KTAP/TAP header line. @@ -281,6 +282,7 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: Parameters: lines - LineStream of KTAP output to parse test - Test object for current test being parsed + printer - Printer object to output results Return: True if successfully parsed KTAP/TAP header line @@ -289,10 +291,10 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: tap_match = TAP_START.match(lines.peek()) if ktap_match: version_num = int(ktap_match.group(1)) - check_version(version_num, KTAP_VERSIONS, 'KTAP', test) + check_version(version_num, KTAP_VERSIONS, 'KTAP', test, printer) elif tap_match: version_num = int(tap_match.group(1)) - check_version(version_num, TAP_VERSIONS, 'TAP', test) + check_version(version_num, TAP_VERSIONS, 'TAP', test, printer) else: return False lines.pop() @@ -380,7 +382,7 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: return name == test.name def parse_test_result(lines: LineStream, test: Test, - expected_num: int) -> bool: + expected_num: int, printer: Printer) -> bool: """ Parses test result line and stores the status and name in the test object. Reports an error if the test number does not match expected @@ -398,6 +400,7 @@ def parse_test_result(lines: LineStream, test: Test, lines - LineStream of KTAP output to parse test - Test object for current test being parsed expected_num - expected test number for current test + printer - Printer object to output results Return: True if successfully parsed a test result line. @@ -420,7 +423,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check test num num = int(match.group(2)) if num != expected_num: - test.add_error(f'Expected test number {expected_num} but found {num}') + test.add_error(printer, f'Expected test number {expected_num} but found {num}') # Set status of test object status = match.group(1) @@ -486,7 +489,7 @@ def format_test_divider(message: str, len_message: int) -> str: len_2 = difference - len_1 return ('=' * len_1) + f' {message} ' + ('=' * len_2) -def print_test_header(test: Test) -> None: +def print_test_header(test: Test, printer: Printer) -> None: """ Prints test header with test name and optionally the expected number of subtests. @@ -496,6 +499,7 @@ def print_test_header(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ message = test.name if message != "": @@ -507,15 +511,15 @@ def print_test_header(test: Test) -> None: message += '(1 subtest)' else: message += f'({test.expected_count} subtests)' - stdout.print_with_timestamp(format_test_divider(message, len(message))) + printer.print_with_timestamp(format_test_divider(message, len(message))) -def print_log(log: Iterable[str]) -> None: +def print_log(log: Iterable[str], printer: Printer) -> None: """Prints all strings in saved log for test in yellow.""" formatted = textwrap.dedent('\n'.join(log)) for line in formatted.splitlines(): - stdout.print_with_timestamp(stdout.yellow(line)) + printer.print_with_timestamp(printer.yellow(line)) -def format_test_result(test: Test) -> str: +def format_test_result(test: Test, printer: Printer) -> str: """ Returns string with formatted test result with colored status and test name. @@ -525,23 +529,24 @@ def format_test_result(test: Test) -> str: Parameters: test - Test object representing current test being printed + printer - Printer object to output results Return: String containing formatted test result """ if test.status == TestStatus.SUCCESS: - return stdout.green('[PASSED] ') + test.name + return printer.green('[PASSED] ') + test.name if test.status == TestStatus.SKIPPED: - return stdout.yellow('[SKIPPED] ') + test.name + return printer.yellow('[SKIPPED] ') + test.name if test.status == TestStatus.NO_TESTS: - return stdout.yellow('[NO TESTS RUN] ') + test.name + return printer.yellow('[NO TESTS RUN] ') + test.name if test.status == TestStatus.TEST_CRASHED: - print_log(test.log) + print_log(test.log, printer) return stdout.red('[CRASHED] ') + test.name - print_log(test.log) - return stdout.red('[FAILED] ') + test.name + print_log(test.log, printer) + return printer.red('[FAILED] ') + test.name -def print_test_result(test: Test) -> None: +def print_test_result(test: Test, printer: Printer) -> None: """ Prints result line with status of test. @@ -550,10 +555,11 @@ def print_test_result(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object """ - stdout.print_with_timestamp(format_test_result(test)) + printer.print_with_timestamp(format_test_result(test, printer)) -def print_test_footer(test: Test) -> None: +def print_test_footer(test: Test, printer: Printer) -> None: """ Prints test footer with status of test. @@ -562,12 +568,38 @@ def print_test_footer(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ - message = format_test_result(test) - stdout.print_with_timestamp(format_test_divider(message, - len(message) - stdout.color_len())) + message = format_test_result(test, printer) + printer.print_with_timestamp(format_test_divider(message, + len(message) - printer.color_len())) +def print_test(test: Test, failed_only: bool, printer: Printer) -> None: + """ + Prints Test object to given printer. For a child test, the result line is + printed. For a parent test, the test header, all child test results, and + the test footer are all printed. If failed_only is true, only failed/crashed + tests will be printed. + Parameters: + test - Test object to print + failed_only - True if only failed/crashed tests should be printed. + printer - Printer object to output results + """ + if test.name == "main": + printer.print_with_timestamp(DIVIDER) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + printer.print_with_timestamp(DIVIDER) + elif test.subtests != []: + if not failed_only or not test.ok_status(): + print_test_header(test, printer) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + print_test_footer(test, printer) + else: + if not failed_only or not test.ok_status(): + print_test_result(test, printer) def _summarize_failed_tests(test: Test) -> str: """Tries to summarize all the failing subtests in `test`.""" @@ -601,7 +633,7 @@ def _summarize_failed_tests(test: Test) -> str: return 'Failures: ' + ', '.join(failures) -def print_summary_line(test: Test) -> None: +def print_summary_line(test: Test, printer: Printer) -> None: """ Prints summary line of test object. Color of line is dependent on status of test. Color is green if test passes, yellow if test is @@ -614,6 +646,7 @@ def print_summary_line(test: Test) -> None: Errors: 0" test - Test object representing current test being printed + printer - Printer object to output results """ if test.status == TestStatus.SUCCESS: color = stdout.green @@ -621,7 +654,7 @@ def print_summary_line(test: Test) -> None: color = stdout.yellow else: color = stdout.red - stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) + printer.print_with_timestamp(color(f'Testing complete. {test.counts}')) # Summarize failures that might have gone off-screen since we had a lot # of tests (arbitrarily defined as >=100 for now). @@ -630,7 +663,7 @@ def print_summary_line(test: Test) -> None: summarized = _summarize_failed_tests(test) if not summarized: return - stdout.print_with_timestamp(color(summarized)) + printer.print_with_timestamp(color(summarized)) # Other methods: @@ -654,7 +687,7 @@ def bubble_up_test_results(test: Test) -> None: elif test.counts.get_status() == TestStatus.TEST_CRASHED: test.status = TestStatus.TEST_CRASHED -def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool) -> Test: +def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool, printer: Printer) -> Test: """ Finds next test to parse in LineStream, creates new Test object, parses any subtests of the test, populates Test object with all @@ -710,6 +743,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: log - list of strings containing any preceding diagnostic lines corresponding to the current test is_subtest - boolean indicating whether test is a subtest + printer - Printer object to output results Return: Test object populated with characteristics and any subtests @@ -725,14 +759,14 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True else: # If not the main test, attempt to parse a test header containing # the KTAP version line and/or subtest header line - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: @@ -740,7 +774,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # to parse test plan and print test header test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) - print_test_header(test) + print_test_header(test, printer) expected_count = test.expected_count subtests = [] test_num = 1 @@ -758,16 +792,16 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parser reaches end of test before # parsing expected number of subtests, print # crashed subtest and record error - test.add_error('missing expected subtest!') + test.add_error(printer, 'missing expected subtest!') sub_test.log.extend(sub_log) test.counts.add_status( TestStatus.TEST_CRASHED) - print_test_result(sub_test) + print_test_result(sub_test, printer) else: test.log.extend(sub_log) break else: - sub_test = parse_test(lines, test_num, sub_log, True) + sub_test = parse_test(lines, test_num, sub_log, True, printer) subtests.append(sub_test) test_num += 1 test.subtests = subtests @@ -775,51 +809,51 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If not main test, look for test result line test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): - test.add_error('missing subtest result line!') + test.add_error(printer, 'missing subtest result line!') else: - parse_test_result(lines, test, expected_num) + parse_test_result(lines, test, expected_num, printer) # Check for there being no subtests within parent test if parent_test and len(subtests) == 0: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS): - print_log(test.log) + print_log(test.log, printer) test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') + test.add_error(printer, '0 tests run!') # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) if parent_test and is_subtest: # If test has subtests and is not the main test object, print # footer. - print_test_footer(test) + print_test_footer(test, printer) elif is_subtest: - print_test_result(test) + print_test_result(test, printer) return test -def parse_run_tests(kernel_output: Iterable[str]) -> Test: +def parse_run_tests(kernel_output: Iterable[str], printer: Printer) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test results and print condensed test results and summary line. Parameters: kernel_output - Iterable object contains lines of kernel output + printer - Printer object to output results Return: Test - the main test object with all subtests. """ - stdout.print_with_timestamp(DIVIDER) + printer.print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) test = Test() if not lines: test.name = '<missing>' - test.add_error('Could not find any KTAP output. Did any KUnit tests run?') + test.add_error(printer, 'Could not find any KTAP output. Did any KUnit tests run?') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - test = parse_test(lines, 0, [], False) + test = parse_test(lines, 0, [], False, printer) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() - stdout.print_with_timestamp(DIVIDER) - print_summary_line(test) + printer.print_with_timestamp(DIVIDER) return test diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py index 015adf87dc2c..ca119f61fe79 100644 --- a/tools/testing/kunit/kunit_printer.py +++ b/tools/testing/kunit/kunit_printer.py @@ -15,12 +15,17 @@ _RESET = '\033[0;0m' class Printer: """Wraps a file object, providing utilities for coloring output, etc.""" - def __init__(self, output: typing.IO[str]): + def __init__(self, print: bool=True, output: typing.IO[str]=sys.stdout): self._output = output - self._use_color = output.isatty() + self._print = print + if print: + self._use_color = output.isatty() + else: + self._use_color = False def print(self, message: str) -> None: - print(message, file=self._output) + if self._print: + print(message, file=self._output) def print_with_timestamp(self, message: str) -> None: ts = datetime.datetime.now().strftime('%H:%M:%S') @@ -45,4 +50,5 @@ class Printer: return len(self.red('')) # Provides a default instance that prints to stdout -stdout = Printer(sys.stdout) +stdout = Printer() +null_printer = Printer(print=False) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 2beb7327e53f..0bcb0cc002f8 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -23,6 +23,7 @@ import kunit_parser import kunit_kernel import kunit_json import kunit +from kunit_printer import stdout test_tmpdir = '' abs_test_data_dir = '' @@ -139,28 +140,28 @@ class KUnitParserTest(unittest.TestCase): def test_parse_successful_test_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_successful_nested_tests_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_kselftest_nested(self): kselftest_log = test_data_path('test_is_test_passed-kselftest.log') with open(kselftest_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(result.counts.errors, 0) @@ -168,7 +169,7 @@ class KUnitParserTest(unittest.TestCase): empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -179,7 +180,7 @@ class KUnitParserTest(unittest.TestCase): with open(missing_plan_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( - file.readlines())) + file.readlines()), stdout) # A missing test plan is not an error. self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -188,7 +189,7 @@ class KUnitParserTest(unittest.TestCase): header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(header_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -197,7 +198,7 @@ class KUnitParserTest(unittest.TestCase): no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') with open(no_plan_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, @@ -210,7 +211,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('kunit_printer.Printer.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) print_mock.stop() self.assertEqual(0, len(result.subtests)) @@ -219,7 +220,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -228,7 +229,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status) self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5)) @@ -236,7 +237,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') with open(hyphen_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -250,7 +251,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_prefix_printk_time(self): prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -258,7 +259,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -266,7 +267,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -274,7 +275,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -282,7 +283,7 @@ class KUnitParserTest(unittest.TestCase): def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertGreaterEqual(result.counts.errors, 1) @@ -290,7 +291,7 @@ class KUnitParserTest(unittest.TestCase): def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -310,7 +311,7 @@ class KUnitParserTest(unittest.TestCase): not ok 2 - test2 not ok 1 - some_failed_suite """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(kunit_parser._summarize_failed_tests(result), @@ -319,7 +320,7 @@ class KUnitParserTest(unittest.TestCase): def test_ktap_format(self): ktap_log = test_data_path('test_parse_ktap_output.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3)) self.assertEqual('suite', result.subtests[0].name) self.assertEqual('case_1', result.subtests[0].subtests[0].name) @@ -328,13 +329,13 @@ class KUnitParserTest(unittest.TestCase): def test_parse_subtest_header(self): ktap_log = test_data_path('test_parse_subtest_header.log') with open(ktap_log) as file: - kunit_parser.parse_run_tests(file.readlines()) + kunit_parser.parse_run_tests(file.readlines(), stdout) self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) def test_parse_attributes(self): ktap_log = test_data_path('test_parse_attributes.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # Test should pass with no errors self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=0)) @@ -355,7 +356,7 @@ class KUnitParserTest(unittest.TestCase): Indented more. not ok 1 test1 """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.print_mock.assert_any_call(StrContains('Test output.')) @@ -544,7 +545,7 @@ class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): with open(test_data_path(log_file)) as file: - test_result = kunit_parser.parse_run_tests(file) + test_result = kunit_parser.parse_run_tests(file, stdout) json_obj = kunit_json.get_json_result( test=test_result, metadata=kunit_json.Metadata()) @@ -810,7 +811,7 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. self.linux_source_mock.run_kernel.assert_called_once_with( @@ -823,7 +824,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300), @@ -836,7 +837,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300), diff --git a/tools/testing/kunit/qemu_configs/loongarch.py b/tools/testing/kunit/qemu_configs/loongarch.py new file mode 100644 index 000000000000..a92422967d1d --- /dev/null +++ b/tools/testing/kunit/qemu_configs/loongarch.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='loongarch', + kconfig=''' +CONFIG_EFI_STUB=n +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PVPANIC=y +CONFIG_PVPANIC_PCI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='loongarch64', + kernel_path='arch/loongarch/boot/vmlinux.elf', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[ + '-machine', 'virt', + '-device', 'pvpanic-pci', + '-cpu', 'max',]) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 892e990c034a..68a064ce598c 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -883,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = { static struct platform_driver ndtest_driver = { .probe = ndtest_probe, - .remove_new = ndtest_remove, + .remove = ndtest_remove, .driver = { .name = KBUILD_MODNAME, }, diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 551ae6898c1d..bc30050227fd 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -462,6 +462,28 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2); + mas_destroy(&mas); + + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2); + mas_destroy(&mas); + mtree_unlock(mt); } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 3f06b6233ae4..2401e973c359 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -72,6 +72,7 @@ TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nsfs +TARGETS += pcie_bwctrl TARGETS += perf_events TARGETS += pidfd TARGETS += pid_namespace diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore index 12dc3fcd3456..3dd8e1176b89 100644 --- a/tools/testing/selftests/alsa/.gitignore +++ b/tools/testing/selftests/alsa/.gitignore @@ -1,3 +1,5 @@ +global-timer mixer-test pcm-test test-pcmtest-driver +utimer-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 944279160fed..8dab90ad22bb 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -27,5 +27,5 @@ include ../lib.mk $(OUTPUT)/libatest.so: conf.c alsa-local.h $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@ -$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h +$(OUTPUT)/%: %.c $(OUTPUT)/libatest.so alsa-local.h $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@ diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 0029ed9c5c9a..35f521e5f41c 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -46,6 +46,12 @@ static void atomics_sigill(void) asm volatile(".inst 0xb82003ff" : : : ); } +static void cmpbr_sigill(void) +{ + /* Not implemented, too complicated and unreliable anyway */ +} + + static void crc32_sigill(void) { /* CRC32W W0, W0, W1 */ @@ -82,6 +88,18 @@ static void f8fma_sigill(void) asm volatile(".inst 0xec0fc00"); } +static void f8mm4_sigill(void) +{ + /* FMMLA V0.4SH, V0.16B, V0.16B */ + asm volatile(".inst 0x6e00ec00"); +} + +static void f8mm8_sigill(void) +{ + /* FMMLA V0.4S, V0.16B, V0.16B */ + asm volatile(".inst 0x6e80ec00"); +} + static void faminmax_sigill(void) { /* FAMIN V0.4H, V0.4H, V0.4H */ @@ -98,6 +116,12 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void fprcvt_sigill(void) +{ + /* FCVTAS S0, H0 */ + asm volatile(".inst 0x1efa0000"); +} + static void gcs_sigill(void) { unsigned long *gcspr; @@ -226,6 +250,42 @@ static void sme2p1_sigill(void) asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); } +static void sme2p2_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* UXTB Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4c1a000" : : : ); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_aes_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* AESD z0.b, z0.b, z0.b */ + asm volatile(".inst 0x4522e400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void sme_sbitperm_sigill(void) +{ + /* SMSTART SM */ + asm volatile("msr S0_3_C4_C3_3, xzr" : : : ); + + /* BDEP Z0.B, Z0.B, Z0.B */ + asm volatile(".inst 0x4500b400" : : : "z0"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + static void smei16i32_sigill(void) { /* SMSTART */ @@ -339,8 +399,44 @@ static void smesf8fma_sigill(void) /* SMSTART */ asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); - /* FMLALB V0.8H, V0.16B, V0.16B */ - asm volatile(".inst 0xec0fc00"); + /* FMLALB Z0.8H, Z0.B, Z0.B */ + asm volatile(".inst 0x64205000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesfexpa_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* FEXPA Z0.D, Z0.D */ + asm volatile(".inst 0x04e0b800"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smesmop4_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* SMOP4A ZA0.S, Z0.B, { Z0.B - Z1.B } */ + asm volatile(".inst 0x80108000"); + + /* SMSTOP */ + asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); +} + +static void smestmop_sigill(void) +{ + /* SMSTART */ + asm volatile("msr S0_3_C4_C7_3, xzr" : : : ); + + /* STMOPA ZA0.S, { Z0.H - Z1.H }, Z0.H, Z20[0] */ + asm volatile(".inst 0x80408008"); /* SMSTOP */ asm volatile("msr S0_3_C4_C6_3, xzr" : : : ); @@ -364,18 +460,42 @@ static void sve2p1_sigill(void) asm volatile(".inst 0x65000000" : : : "z0"); } +static void sve2p2_sigill(void) +{ + /* NOT Z0.D, P0/Z, Z0.D */ + asm volatile(".inst 0x4cea000" : : : "z0"); +} + static void sveaes_sigill(void) { /* AESD z0.b, z0.b, z0.b */ asm volatile(".inst 0x4522e400" : : : "z0"); } +static void sveaes2_sigill(void) +{ + /* AESD {Z0.B - Z1.B }, { Z0.B - Z1.B }, Z0.Q */ + asm volatile(".inst 0x4522ec00" : : : "z0"); +} + static void sveb16b16_sigill(void) { /* BFADD Z0.H, Z0.H, Z0.H */ asm volatile(".inst 0x65000000" : : : ); } +static void svebfscale_sigill(void) +{ + /* BFSCALE Z0.H, P0/M, Z0.H, Z0.H */ + asm volatile(".inst 0x65098000" : : : "z0"); +} + +static void svef16mm_sigill(void) +{ + /* FMMLA Z0.S, Z0.H, Z0.H */ + asm volatile(".inst 0x6420e400"); +} + static void svepmull_sigill(void) { /* PMULLB Z0.Q, Z0.D, Z0.D */ @@ -394,6 +514,12 @@ static void svesha3_sigill(void) asm volatile(".inst 0x4203800" : : : "z0"); } +static void sveeltperm_sigill(void) +{ + /* COMPACT Z0.B, P0, Z0.B */ + asm volatile(".inst 0x5218000" : : : "x0"); +} + static void svesm4_sigill(void) { /* SM4E Z0.S, Z0.S, Z0.S */ @@ -470,6 +596,13 @@ static const struct hwcap_data { .sigill_fn = aes_sigill, }, { + .name = "CMPBR", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_CMPBR, + .cpuinfo = "cmpbr", + .sigill_fn = cmpbr_sigill, + }, + { .name = "CRC32", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_CRC32, @@ -524,6 +657,20 @@ static const struct hwcap_data { .sigill_fn = f8fma_sigill, }, { + .name = "F8MM8", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM8, + .cpuinfo = "f8mm8", + .sigill_fn = f8mm8_sigill, + }, + { + .name = "F8MM4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_F8MM4, + .cpuinfo = "f8mm4", + .sigill_fn = f8mm4_sigill, + }, + { .name = "FAMINMAX", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_FAMINMAX, @@ -546,6 +693,13 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "FPRCVT", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_FPRCVT, + .cpuinfo = "fprcvt", + .sigill_fn = fprcvt_sigill, + }, + { .name = "GCS", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_GCS, @@ -692,6 +846,20 @@ static const struct hwcap_data { .sigill_fn = sme2p1_sigill, }, { + .name = "SME 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME2P2, + .cpuinfo = "sme2p2", + .sigill_fn = sme2p2_sigill, + }, + { + .name = "SME AES", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_AES, + .cpuinfo = "smeaes", + .sigill_fn = sme_aes_sigill, + }, + { .name = "SME I16I32", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_I16I32, @@ -741,6 +909,13 @@ static const struct hwcap_data { .sigill_fn = smelutv2_sigill, }, { + .name = "SME SBITPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SBITPERM, + .cpuinfo = "smesbitperm", + .sigill_fn = sme_sbitperm_sigill, + }, + { .name = "SME SF8FMA", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME_SF8FMA, @@ -762,6 +937,27 @@ static const struct hwcap_data { .sigill_fn = smesf8dp4_sigill, }, { + .name = "SME SFEXPA", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SFEXPA, + .cpuinfo = "smesfexpa", + .sigill_fn = smesfexpa_sigill, + }, + { + .name = "SME SMOP4", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_SMOP4, + .cpuinfo = "smesmop4", + .sigill_fn = smesmop4_sigill, + }, + { + .name = "SME STMOP", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SME_STMOP, + .cpuinfo = "smestmop", + .sigill_fn = smestmop_sigill, + }, + { .name = "SVE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_SVE, @@ -784,6 +980,13 @@ static const struct hwcap_data { .sigill_fn = sve2p1_sigill, }, { + .name = "SVE 2.2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE2P2, + .cpuinfo = "sve2p2", + .sigill_fn = sve2p2_sigill, + }, + { .name = "SVE AES", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEAES, @@ -791,6 +994,34 @@ static const struct hwcap_data { .sigill_fn = sveaes_sigill, }, { + .name = "SVE AES2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_AES2, + .cpuinfo = "sveaes2", + .sigill_fn = sveaes2_sigill, + }, + { + .name = "SVE BFSCALE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_BFSCALE, + .cpuinfo = "svebfscale", + .sigill_fn = svebfscale_sigill, + }, + { + .name = "SVE ELTPERM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_ELTPERM, + .cpuinfo = "sveeltperm", + .sigill_fn = sveeltperm_sigill, + }, + { + .name = "SVE F16MM", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SVE_F16MM, + .cpuinfo = "svef16mm", + .sigill_fn = svef16mm_sigill, + }, + { .name = "SVE2 B16B16", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVE_B16B16, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S index df3230fdac39..66ab2e0bae5f 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -81,32 +81,31 @@ do_syscall: stp x27, x28, [sp, #96] // Set SVCR if we're doing SME - cbz x1, 1f + cbz x1, load_gpr adrp x2, svcr_in ldr x2, [x2, :lo12:svcr_in] msr S3_3_C4_C2_2, x2 -1: // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, load_gpr mov w12, #0 ldr x2, =za_in -2: _ldr_za 12, 2 +1: _ldr_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, load_gpr adrp x2, zt_in add x2, x2, :lo12:zt_in _ldr_zt 2 -1: +load_gpr: // Load GPRs x8-x28, and save our SP/FP for later comparison ldr x2, =gpr_in add x2, x2, #64 @@ -125,9 +124,9 @@ do_syscall: str x30, [x2], #8 // LR // Load FPRs if we're not doing neither SVE nor streaming SVE - cbnz x0, 1f + cbnz x0, check_sve_in ldr x2, =svcr_in - tbnz x2, #SVCR_SM_SHIFT, 1f + tbnz x2, #SVCR_SM_SHIFT, check_sve_in ldr x2, =fpr_in ldp q0, q1, [x2] @@ -148,8 +147,8 @@ do_syscall: ldp q30, q31, [x2, #16 * 30] b 2f -1: +check_sve_in: // Load the SVE registers if we're doing SVE/SME ldr x2, =z_in @@ -256,32 +255,31 @@ do_syscall: stp q30, q31, [x2, #16 * 30] // Save SVCR if we're doing SME - cbz x1, 1f + cbz x1, check_sve_out mrs x2, S3_3_C4_C2_2 adrp x3, svcr_out str x2, [x3, :lo12:svcr_out] -1: // Save ZA if it's enabled - uses x12 as scratch due to SME STR - tbz x2, #SVCR_ZA_SHIFT, 1f + tbz x2, #SVCR_ZA_SHIFT, check_sve_out mov w12, #0 ldr x2, =za_out -2: _str_za 12, 2 +1: _str_za 12, 2 add x2, x2, x1 add x12, x12, #1 cmp x1, x12 - bne 2b + bne 1b // ZT0 mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1 ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \ #ID_AA64SMFR0_EL1_SMEver_WIDTH - cbz x2, 1f + cbz x2, check_sve_out adrp x2, zt_out add x2, x2, :lo12:zt_out _str_zt 2 -1: +check_sve_out: // Save the SVE state if we have some cbz x0, 1f diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c2a1842c3d8b..e9c377001f93 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -5,7 +5,6 @@ bpf-syscall* test_verifier test_maps test_lru_map -test_lpm_map test_tag FEATURE-DUMP.libbpf FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ad3b1ba1920..0a016cd71cba 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -83,7 +83,7 @@ CLANG_CPUV4 := 1 endif # Order correspond to 'make run_tests' order -TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ +TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 @@ -129,7 +129,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ test_xdp_redirect_multi.sh \ - test_xdp_meta.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c index d98c72dc563e..d32e4edac930 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c @@ -20,10 +20,12 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <endian.h> #include <arpa/inet.h> #include <sys/time.h> #include <bpf/bpf.h> +#include <test_maps.h> #include "bpf_util.h" @@ -33,6 +35,22 @@ struct tlpm_node { uint8_t key[]; }; +struct lpm_trie_bytes_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned char data[8]; +}; + +struct lpm_trie_int_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned int data; +}; + static struct tlpm_node *tlpm_match(struct tlpm_node *list, const uint8_t *key, size_t n_bits); @@ -223,7 +241,7 @@ static void test_lpm_map(int keysize) n_matches = 0; n_matches_after_delete = 0; n_nodes = 1 << 8; - n_lookups = 1 << 16; + n_lookups = 1 << 9; data = alloca(keysize); memset(data, 0, keysize); @@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void) close(map_fd); } -int main(void) +static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int fd; + + opts.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries, + &opts); + CHECK(fd < 0, "bpf_map_create", "error %d\n", errno); + + return fd; +} + +static void test_lpm_trie_update_flags(void) +{ + struct lpm_trie_int_key key; + unsigned int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* overwrite an empty qp-trie (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add the same node as new node (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "add new elem again", "error %d\n", err); + + /* overwrite the existed node */ + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the node */ + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "update elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite a non-existent node which is the prefix of the first + * node (Error). + */ + key.prefixlen = 8; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + /* add a new node which is the prefix of the first node */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add another new node which will be the sibling of the first node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the third node */ + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* delete the second node to make it an intermediate node */ + key.prefixlen = 8; + key.data = 0; + err = bpf_map_delete_elem(fd, &key); + CHECK(err, "del elem", "error %d\n", err); + + /* overwrite the intermediate node (Error) */ + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + close(fd); +} + +static void test_lpm_trie_update_full_map(void) +{ + struct lpm_trie_int_key key; + int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 8; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* try to add more node (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err); + + /* update the value of an existed node with BPF_EXIST */ + key.prefixlen = 16; + key.data = 0; + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* update the value of an existed node with BPF_ANY */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + close(fd); +} + +static int cmp_str(const void *a, const void *b) +{ + const char *str_a = *(const char **)a, *str_b = *(const char **)b; + + return strcmp(str_a, str_b); +} + +/* Save strings in LPM trie. The trailing '\0' for each string will be + * accounted in the prefixlen. The strings returned during the iteration + * should be sorted as expected. + */ +static void test_lpm_trie_iterate_strs(void) +{ + static const char * const keys[] = { + "ab", "abO", "abc", "abo", "abS", "abcd", + }; + const char *sorted_keys[ARRAY_SIZE(keys)]; + struct lpm_trie_bytes_key key, next_key; + unsigned int value, got, i, j, len; + struct lpm_trie_bytes_key *cur; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys)); + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + unsigned int flags; + + /* add i-th element */ + flags = i % 2 ? BPF_NOEXIST : 0; + len = strlen(keys[i]); + /* include the trailing '\0' */ + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[i], len); + value = i + 100; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "add elem", "#%u error %d\n", i, err); + + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u error %d\n", i, err); + CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got); + + /* re-add i-th element (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err); + + /* Overwrite i-th element */ + flags = i % 2 ? 0 : BPF_EXIST; + value = i; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "update elem", "error %d\n", err); + + /* Lookup #[0~i] elements */ + for (j = 0; j <= i; j++) { + len = strlen(keys[j]); + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[j], len); + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err); + CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n", + i, j, value, got); + } + } + + /* Add element to a full qp-trie (Error) */ + key.prefixlen = sizeof(key.data) * 8; + memset(key.data, 0, sizeof(key.data)); + value = 0; + err = bpf_map_update_elem(fd, &key, &value, 0); + CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err); + + /* Iterate sorted elements: no deletion */ + memcpy(sorted_keys, keys, sizeof(keys)); + qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str); + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + /* Iterate sorted elements: delete the found key after each iteration */ + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + + err = bpf_map_delete_elem(fd, cur); + CHECK(err, "delete", "#%u error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err); + + close(fd); +} + +/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of + * LPM trie will return these integers in big-endian order, therefore, convert + * these integers to big-endian before update. After each iteration, delete the + * found key (the smallest integer) and expect the next iteration will return + * the second smallest number. + */ +static void test_lpm_trie_iterate_ints(void) +{ + struct lpm_trie_int_key key, next_key; + unsigned int i, max_entries; + struct lpm_trie_int_key *cur; + unsigned int *data_set; + int fd, err; + bool value; + + max_entries = 4096; + data_set = calloc(max_entries, sizeof(*data_set)); + CHECK(!data_set, "malloc", "no mem\n"); + for (i = 0; i < max_entries; i++) + data_set[i] = i; + + fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries); + value = true; + for (i = 0; i < max_entries; i++) { + key.prefixlen = 32; + key.data = htobe32(data_set[i]); + + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add elem", "#%u error %d\n", i, err); + } + + cur = NULL; + for (i = 0; i < max_entries; i++) { + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n", + i, next_key.prefixlen); + CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n", + i, be32toh(next_key.data), data_set[i]); + cur = &next_key; + + /* + * Delete the minimal key, the next call of bpf_get_next_key() + * will return the second minimal key. + */ + err = bpf_map_delete_elem(fd, &next_key); + CHECK(err, "del elem", "#%u elem error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + err = bpf_map_get_next_key(fd, NULL, &next_key); + CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err); + + free(data_set); + + close(fd); +} + +void test_lpm_trie_map_basic_ops(void) { int i; /* we want predictable, pseudo random tests */ srand(0xf00ba1); - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - test_lpm_basic(); test_lpm_order(); @@ -792,6 +1179,10 @@ int main(void) test_lpm_get_next_key(); test_lpm_multi_thread(); - printf("test_lpm: OK\n"); - return 0; + test_lpm_trie_update_flags(); + test_lpm_trie_update_full_map(); + test_lpm_trie_iterate_strs(); + test_lpm_trie_iterate_ints(); + + printf("%s: PASS\n", __func__); } diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 62971dbf2996..a4121d2248ac 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -78,8 +78,8 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "open_and_load", "error %d\n", err); /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPT) { - printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__); + if (!skel->kconfig->CONFIG_PREEMPTION) { + printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); read_bpf_task_storage_busy__destroy(skel); skips++; return; diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..7526de379081 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + */ +void test_changes_pkt_data_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c index 6fa19449297e..43676a9922dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -3,11 +3,14 @@ #include <test_progs.h> #include "raw_tp_null.skel.h" +#include "raw_tp_null_fail.skel.h" void test_raw_tp_null(void) { struct raw_tp_null *skel; + RUN_TESTS(raw_tp_null_fail); + skel = raw_tp_null__open_and_load(); if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h new file mode 100644 index 000000000000..1bdfb79ef009 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h @@ -0,0 +1,394 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SOCKET_HELPERS__ +#define __SOCKET_HELPERS__ + +#include <linux/vm_sockets.h> + +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 + +/* workaround for older vm_sockets.h */ +#ifndef VMADDR_CID_LOCAL +#define VMADDR_CID_LOCAL 1 +#endif + +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + +/* Wrappers that fail the test on error and report it. */ + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static inline void init_addr_loopback4(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static inline void init_addr_loopback6(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + +static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static inline int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len = 0; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static inline int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + +static inline int poll_connect(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set wfds; + int r, eval; + socklen_t esize = sizeof(eval); + + FD_ZERO(&wfds); + FD_SET(fd, &wfds); + + r = select(fd + 1, NULL, &wfds, NULL, &timeout); + if (r == 0) + errno = ETIME; + if (r != 1) + return -1; + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) + return -1; + if (eval != 0) { + errno = eval; + return -1; + } + + return 0; +} + +static inline int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static inline int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + + +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} + +#endif // __SOCKET_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index a2041f8e32eb..884ad87783d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -12,6 +12,7 @@ #include "test_sockmap_progs_query.skel.h" #include "test_sockmap_pass_prog.skel.h" #include "test_sockmap_drop_prog.skel.h" +#include "test_sockmap_change_tail.skel.h" #include "bpf_iter_sockmap.skel.h" #include "sockmap_helpers.h" @@ -108,6 +109,35 @@ out: close(s); } +static void test_sockmap_vsock_delete_on_close(void) +{ + int err, c, p, map; + const int zero = 0; + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) + return; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_GE(map, 0, "bpf_map_create")) { + close(c); + goto out; + } + + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); + close(c); + if (!ASSERT_OK(err, "bpf_map_update")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + ASSERT_OK(err, "after close(), bpf_map_update"); + +out: + close(p); + close(map); +} + static void test_skmsg_helpers(enum bpf_map_type map_type) { struct test_skmsg_load_helpers *skel; @@ -614,6 +644,54 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_change_tail(void) +{ + struct test_sockmap_change_tail *skel; + int err, map, verdict; + int c1, p1, sent, recvd; + int zero = 0; + char buf[2]; + + skel = test_sockmap_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + sent = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(sent, 2, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "G", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 2, 0); + ASSERT_EQ(recvd, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + sent = xsend(p1, "E", 1, 0); + ASSERT_EQ(sent, 1, "xsend(p1)"); + recvd = recv(c1, buf, 1, 0); + ASSERT_EQ(recvd, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_change_tail__destroy(skel); +} + static void test_sockmap_skb_verdict_peek_helper(int map) { int err, c1, p1, zero = 0, sent, recvd, avail; @@ -905,8 +983,10 @@ static void test_sockmap_same_sock(void) err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); - if (err) + if (err) { + close(tcp); goto out; + } for (i = 0; i < 2; i++) { err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); @@ -925,24 +1005,70 @@ static void test_sockmap_same_sock(void) ASSERT_OK(err, "bpf_map_update_elem(tcp)"); } + close(tcp); err = bpf_map_delete_elem(map, &zero); - ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + ASSERT_ERR(err, "bpf_map_delete_elem(entry)"); close(stream[0]); close(stream[1]); out: close(dgram); - close(tcp); close(udp); test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_skb_verdict_vsock_poll(void) +{ + struct test_sockmap_pass_prog *skel; + int err, map, conn, peer; + struct bpf_program *prog; + struct bpf_link *link; + char buf = 'x'; + int zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer)) + goto destroy; + + prog = skel->progs.prog_skb_verdict; + map = bpf_map__fd(skel->maps.sock_map_rx); + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto close; + + err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto detach; + + if (xsend(peer, &buf, 1, 0) != 1) + goto detach; + + err = poll_read(conn, IO_TIMEOUT_SEC); + if (!ASSERT_OK(err, "poll")) + goto detach; + + if (xrecv_nonblock(conn, &buf, 1, 0) != 1) + FAIL("xrecv_nonblock"); +detach: + bpf_link__detach(link); +close: + xclose(conn); + xclose(peer); +destroy: + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash create_update_free")) test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap vsock delete on close")) + test_sockmap_vsock_delete_on_close(); if (test__start_subtest("sockmap sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) @@ -981,6 +1107,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict change tail")) + test_sockmap_skb_verdict_change_tail(); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) @@ -997,4 +1125,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link")) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap skb_verdict vsock poll")) + test_sockmap_skb_verdict_vsock_poll(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index 38e35c72bdaa..3e5571dd578d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -1,139 +1,12 @@ #ifndef __SOCKMAP_HELPERS__ #define __SOCKMAP_HELPERS__ -#include <linux/vm_sockets.h> +#include "socket_helpers.h" -/* include/linux/net.h */ -#define SOCK_TYPE_MASK 0xf - -#define IO_TIMEOUT_SEC 30 -#define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 -/* workaround for older vm_sockets.h */ -#ifndef VMADDR_CID_LOCAL -#define VMADDR_CID_LOCAL 1 -#endif - #define __always_unused __attribute__((__unused__)) -/* include/linux/cleanup.h */ -#define __get_and_null(p, nullvalue) \ - ({ \ - __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = nullvalue; \ - __val; \ - }) - -#define take_fd(fd) __get_and_null(fd, -EBADF) - -#define _FAIL(errnum, fmt...) \ - ({ \ - error_at_line(0, (errnum), __func__, __LINE__, fmt); \ - CHECK_FAIL(true); \ - }) -#define FAIL(fmt...) _FAIL(0, fmt) -#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) -#define FAIL_LIBBPF(err, msg) \ - ({ \ - char __buf[MAX_STRERR_LEN]; \ - libbpf_strerror((err), __buf, sizeof(__buf)); \ - FAIL("%s: %s", (msg), __buf); \ - }) - -/* Wrappers that fail the test on error and report it. */ - -#define xaccept_nonblock(fd, addr, len) \ - ({ \ - int __ret = \ - accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("accept"); \ - __ret; \ - }) - -#define xbind(fd, addr, len) \ - ({ \ - int __ret = bind((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("bind"); \ - __ret; \ - }) - -#define xclose(fd) \ - ({ \ - int __ret = close((fd)); \ - if (__ret == -1) \ - FAIL_ERRNO("close"); \ - __ret; \ - }) - -#define xconnect(fd, addr, len) \ - ({ \ - int __ret = connect((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("connect"); \ - __ret; \ - }) - -#define xgetsockname(fd, addr, len) \ - ({ \ - int __ret = getsockname((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockname"); \ - __ret; \ - }) - -#define xgetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = getsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockopt(" #name ")"); \ - __ret; \ - }) - -#define xlisten(fd, backlog) \ - ({ \ - int __ret = listen((fd), (backlog)); \ - if (__ret == -1) \ - FAIL_ERRNO("listen"); \ - __ret; \ - }) - -#define xsetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = setsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("setsockopt(" #name ")"); \ - __ret; \ - }) - -#define xsend(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = send((fd), (buf), (len), (flags)); \ - if (__ret == -1) \ - FAIL_ERRNO("send"); \ - __ret; \ - }) - -#define xrecv_nonblock(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ - IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("recv"); \ - __ret; \ - }) - -#define xsocket(family, sotype, flags) \ - ({ \ - int __ret = socket(family, sotype, flags); \ - if (__ret == -1) \ - FAIL_ERRNO("socket"); \ - __ret; \ - }) - #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ @@ -193,130 +66,6 @@ __ret; \ }) -static inline void close_fd(int *fd) -{ - if (*fd >= 0) - xclose(*fd); -} - -#define __close_fd __attribute__((cleanup(close_fd))) - -static inline int poll_connect(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set wfds; - int r, eval; - socklen_t esize = sizeof(eval); - - FD_ZERO(&wfds); - FD_SET(fd, &wfds); - - r = select(fd + 1, NULL, &wfds, NULL, &timeout); - if (r == 0) - errno = ETIME; - if (r != 1) - return -1; - - if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) - return -1; - if (eval != 0) { - errno = eval; - return -1; - } - - return 0; -} - -static inline int poll_read(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set rfds; - int r; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - r = select(fd + 1, &rfds, NULL, NULL, &timeout); - if (r == 0) - errno = ETIME; - - return r == 1 ? 0 : -1; -} - -static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return accept(fd, addr, len); -} - -static inline int recv_timeout(int fd, void *buf, size_t len, int flags, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return recv(fd, buf, len, flags); -} - -static inline void init_addr_loopback4(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); - - addr4->sin_family = AF_INET; - addr4->sin_port = 0; - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - *len = sizeof(*addr4); -} - -static inline void init_addr_loopback6(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); - - addr6->sin6_family = AF_INET6; - addr6->sin6_port = 0; - addr6->sin6_addr = in6addr_loopback; - *len = sizeof(*addr6); -} - -static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, - socklen_t *len) -{ - struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); - - addr->svm_family = AF_VSOCK; - addr->svm_port = VMADDR_PORT_ANY; - addr->svm_cid = VMADDR_CID_LOCAL; - *len = sizeof(*addr); -} - -static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, - socklen_t *len) -{ - switch (family) { - case AF_INET: - init_addr_loopback4(ss, len); - return; - case AF_INET6: - init_addr_loopback6(ss, len); - return; - case AF_VSOCK: - init_addr_loopback_vsock(ss, len); - return; - default: - FAIL("unsupported address family %d", family); - } -} - -static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) -{ - return (struct sockaddr *)ss; -} - static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) { u64 value; @@ -334,136 +83,4 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int enable_reuseport(int s, int progfd) -{ - int err, one = 1; - - err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); - if (err) - return -1; - err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, - sizeof(progfd)); - if (err) - return -1; - - return 0; -} - -static inline int socket_loopback_reuseport(int family, int sotype, int progfd) -{ - struct sockaddr_storage addr; - socklen_t len = 0; - int err, s; - - init_addr_loopback(family, &addr, &len); - - s = xsocket(family, sotype, 0); - if (s == -1) - return -1; - - if (progfd >= 0) - enable_reuseport(s, progfd); - - err = xbind(s, sockaddr(&addr), len); - if (err) - goto close; - - if (sotype & SOCK_DGRAM) - return s; - - err = xlisten(s, SOMAXCONN); - if (err) - goto close; - - return s; -close: - xclose(s); - return -1; -} - -static inline int socket_loopback(int family, int sotype) -{ - return socket_loopback_reuseport(family, sotype, -1); -} - -static inline int create_pair(int family, int sotype, int *p0, int *p1) -{ - __close_fd int s, c = -1, p = -1; - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int err; - - s = socket_loopback(family, sotype); - if (s < 0) - return s; - - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - c = xsocket(family, sotype, 0); - if (c < 0) - return c; - - err = connect(c, sockaddr(&addr), len); - if (err) { - if (errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - return err; - } - - err = poll_connect(c, IO_TIMEOUT_SEC); - if (err) { - FAIL_ERRNO("poll_connect"); - return err; - } - } - - switch (sotype & SOCK_TYPE_MASK) { - case SOCK_DGRAM: - err = xgetsockname(c, sockaddr(&addr), &len); - if (err) - return err; - - err = xconnect(s, sockaddr(&addr), len); - if (err) - return err; - - *p0 = take_fd(s); - break; - case SOCK_STREAM: - case SOCK_SEQPACKET: - p = xaccept_nonblock(s, NULL, NULL); - if (p < 0) - return p; - - *p0 = take_fd(p); - break; - default: - FAIL("Unsupported socket type %#x", sotype); - return -EOPNOTSUPP; - } - - *p1 = take_fd(c); - return 0; -} - -static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, - int *p0, int *p1) -{ - int err; - - err = create_pair(family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - - return err; -} - #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 60f474d965a9..42e822ea352f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -197,7 +197,7 @@ static void test_nodeadlock(void) /* Unnecessary recursion and deadlock detection are reproducible * in the preemptible kernel. */ - if (!skel->kconfig->CONFIG_PREEMPT) { + if (!skel->kconfig->CONFIG_PREEMPTION) { test__skip(); goto done; } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c new file mode 100644 index 000000000000..74752233e779 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <test_progs.h> +#include <linux/pkt_cls.h> + +#include "test_tc_change_tail.skel.h" +#include "socket_helpers.h" + +#define LO_IFINDEX 1 + +void test_tc_change_tail(void) +{ + LIBBPF_OPTS(bpf_tcx_opts, tcx_opts); + struct test_tc_change_tail *skel = NULL; + struct bpf_link *link; + int c1, p1; + char buf[2]; + int ret; + + skel = test_tc_change_tail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load")) + return; + + link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX, + &tcx_opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx")) + goto destroy; + + skel->links.change_tail = link; + ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1); + if (!ASSERT_OK(ret, "create_pair")) + goto destroy; + + ret = xsend(p1, "Tr", 2, 0); + ASSERT_EQ(ret, 2, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 2, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "G", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 2, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret"); + + ret = xsend(p1, "E", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + ret = xsend(p1, "Z", 1, 0); + ASSERT_EQ(ret, 1, "xsend(p1)"); + ret = recv(c1, buf, 1, 0); + ASSERT_EQ(ret, 1, "recv(c1)"); + ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret"); + + close(c1); + close(p1); +destroy: + test_tc_change_tail__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index 151a4210028f..2461d183dee5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,10 +14,16 @@ #include "netlink_helpers.h" #include "tc_helpers.h" +#define NETKIT_HEADROOM 32 +#define NETKIT_TAILROOM 8 + #define MARK 42 #define PRIO 0xeb9f #define ICMP_ECHO 8 +#define FLAG_ADJUST_ROOM (1 << 0) +#define FLAG_SAME_NETNS (1 << 1) + struct icmphdr { __u8 type; __u8 code; @@ -35,7 +41,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns, int scrub, int peer_scrub) + int scrub, int peer_scrub, __u32 flags) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + if (flags & FLAG_ADJUST_ROOM) { + addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM); + addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM); + } addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, " addr ee:ff:bb:cc:aa:dd"), "set hwaddress"); } - if (same_netns) { + if (flags & FLAG_SAME_NETNS) { ASSERT_OK(system("ip link set dev " netkit_peer " up"), "up peer"); ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), @@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } -static void serial_test_tc_netkit_scrub_type(int scrub) +static void serial_test_tc_netkit_scrub_type(int scrub, bool room) { LIBBPF_OPTS(bpf_netkit_opts, optl); struct test_tc_link *skel; @@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, scrub, scrub); + &ifindex, scrub, scrub, + room ? FLAG_ADJUST_ROOM : 0); if (err) return; @@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); + ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom"); + ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom"); cleanup: test_tc_link__destroy(skel); @@ -852,6 +865,6 @@ cleanup: void serial_test_tc_netkit_scrub(void) { - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index d9f65adb456b..3ee40ee9413a 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -225,24 +225,7 @@ void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } - -void test_verifier_mtu(void) -{ - __u64 caps = 0; - int ret; - - /* In case CAP_BPF and CAP_PERFMON is not set */ - ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); - if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) - return; - ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); - if (!ASSERT_OK(ret, "disable_cap_sys_admin")) - goto restore_cap; - RUN(verifier_mtu); -restore_cap: - if (caps) - cap_enable_effective(caps, NULL); -} +void test_verifier_mtu(void) { RUN(verifier_mtu); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index e6a783c7f5db..937da9b7532a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -2,6 +2,14 @@ #include <test_progs.h> #include <network_helpers.h> #include "test_xdp_context_test_run.skel.h" +#include "test_xdp_meta.skel.h" + +#define TX_ADDR "10.0.0.1" +#define RX_ADDR "10.0.0.2" +#define RX_NAME "veth0" +#define TX_NAME "veth1" +#define TX_NETNS "xdp_context_tx" +#define RX_NETNS "xdp_context_rx" void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -103,3 +111,82 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } + +void test_xdp_context_functional(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *rx_ns = NULL, *tx_ns = NULL; + struct bpf_program *tc_prog, *xdp_prog; + struct test_xdp_meta *skel = NULL; + struct nstoken *nstoken = NULL; + int rx_ifindex; + int ret; + + tx_ns = netns_new(TX_NETNS, false); + if (!ASSERT_OK_PTR(tx_ns, "create tx_ns")) + return; + + rx_ns = netns_new(RX_NETNS, false); + if (!ASSERT_OK_PTR(rx_ns, "create rx_ns")) + goto close; + + SYS(close, "ip link add " RX_NAME " netns " RX_NETNS + " type veth peer name " TX_NAME " netns " TX_NETNS); + + nstoken = open_netns(RX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) + goto close; + + SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); + SYS(close, "ip link set dev " RX_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + rx_ifindex = if_nametoindex(RX_NAME); + if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx")) + goto close; + + tc_hook.ifindex = rx_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls"); + if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp"); + if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog")) + goto close; + + ret = bpf_xdp_attach(rx_ifindex, + bpf_program__fd(xdp_prog), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + close_netns(nstoken); + + nstoken = open_netns(TX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) + goto close; + + SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); + SYS(close, "ip link set dev " TX_NAME " up"); + ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + +close: + close_netns(nstoken); + test_xdp_meta__destroy(skel); + netns_free(rx_ns); + netns_free(tx_ns); +} + diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index eccaf955e394..f45f4352feeb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -5,6 +5,10 @@ #define XSTR(s) STR(s) #define STR(s) #s +/* Expand a macro and then stringize the expansion */ +#define QUOTE(str) #str +#define EXPAND_QUOTE(str) QUOTE(str) + /* This set of attributes controls behavior of the * test_loader.c:test_loader__run_subtests(). * @@ -106,6 +110,7 @@ * __arch_* Specify on which architecture the test case should be tested. * Several __arch_* annotations could be specified at once. * When test case is not run on current arch it is marked as skipped. + * __caps_unpriv Specify the capabilities that should be set when running the test. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) #define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg))) @@ -129,6 +134,13 @@ #define __arch_x86_64 __arch("X86_64") #define __arch_arm64 __arch("ARM64") #define __arch_riscv64 __arch("RISCV64") +#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps)))) + +/* Define common capabilities tested using __caps_unpriv */ +#define CAP_NET_ADMIN 12 +#define CAP_SYS_ADMIN 21 +#define CAP_PERFMON 38 +#define CAP_BPF 39 /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..43cada48b28a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..f9a622705f1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 8f36c9de7591..dfd817d0348c 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) /* A dynptr can't be used after it has been invalidated */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -428,7 +428,7 @@ int invalid_helper2(void *ctx) /* A bpf_dynptr is invalidated if it's been written into */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -1407,7 +1407,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) /* bpf_dynptr_adjust can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_adjust_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1420,7 +1420,7 @@ int dynptr_adjust_invalid(void *ctx) /* bpf_dynptr_is_null can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_null_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1433,7 +1433,7 @@ int dynptr_is_null_invalid(void *ctx) /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_is_rdonly_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1446,7 +1446,7 @@ int dynptr_is_rdonly_invalid(void *ctx) /* bpf_dynptr_size can only be called on initialized dynptrs */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int dynptr_size_invalid(void *ctx) { struct bpf_dynptr ptr = {}; @@ -1459,7 +1459,7 @@ int dynptr_size_invalid(void *ctx) /* Only initialized dynptrs can be cloned */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #1") +__failure __msg("Expected an initialized dynptr as arg #0") int clone_invalid1(void *ctx) { struct bpf_dynptr ptr1 = {}; @@ -1493,7 +1493,7 @@ int clone_invalid2(struct xdp_md *xdp) /* Invalidating a dynptr should invalidate its clones */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate1(void *ctx) { struct bpf_dynptr clone; @@ -1514,7 +1514,7 @@ int clone_invalidate1(void *ctx) /* Invalidating a dynptr should invalidate its parent */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate2(void *ctx) { struct bpf_dynptr ptr; @@ -1535,7 +1535,7 @@ int clone_invalidate2(void *ctx) /* Invalidating a dynptr should invalidate its siblings */ SEC("?raw_tp") -__failure __msg("Expected an initialized dynptr as arg #3") +__failure __msg("Expected an initialized dynptr as arg #2") int clone_invalidate3(void *ctx) { struct bpf_dynptr ptr; @@ -1723,7 +1723,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) } SEC("?raw_tp") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int test_dynptr_reg_type(void *ctx) { struct task_struct *current = NULL; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index ef70b88bccb2..7c969c127573 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1486,4 +1486,30 @@ int iter_subprog_check_stacksafe(const void *ctx) return 0; } +struct bpf_iter_num global_it; + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_new_bad_arg(const void *ctx) +{ + bpf_iter_num_new(&global_it, 0, 1); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_next_bad_arg(const void *ctx) +{ + bpf_iter_num_next(&global_it); + return 0; +} + +SEC("raw_tp") +__failure __msg("arg#0 expected pointer to an iterator on stack") +int iter_destroy_bad_arg(const void *ctx) +{ + bpf_iter_num_destroy(&global_it); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c index d47e59aba6de..f41257eadbb2 100644 --- a/tools/testing/selftests/bpf/progs/iters_state_safety.c +++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c @@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int destroy_without_creating_fail(void *ctx) { /* init with zeros to stop verifier complaining about uninit stack */ @@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_direct_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int compromise_iter_w_helper_write_fail(void *ctx) { struct bpf_iter_num iter; @@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx) } SEC("?raw_tp") -__failure __msg("expected uninitialized iter_num as arg #1") +__failure __msg("expected uninitialized iter_num as arg #0") int double_create_fail(void *ctx) { struct bpf_iter_num iter; @@ -258,7 +258,7 @@ int double_create_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int double_destroy_fail(void *ctx) { struct bpf_iter_num iter; @@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_without_new_fail(void *ctx) { struct bpf_iter_num iter; @@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx) } SEC("?raw_tp") -__failure __msg("expected an initialized iter_num as arg #1") +__failure __msg("expected an initialized iter_num as arg #0") int next_after_destroy_fail(void *ctx) { struct bpf_iter_num iter; diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c index 4a176e6aede8..6543d5b6e0a9 100644 --- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c +++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c @@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_before_bad(const void *ctx) { struct bpf_iter_testmod_seq it; @@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx) SEC("?raw_tp") __failure -__msg("expected an initialized iter_testmod_seq as arg #2") +__msg("expected an initialized iter_testmod_seq as arg #1") int testmod_seq_getter_after_bad(const void *ctx) { struct bpf_iter_testmod_seq it; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c index 457f34c151e3..5927054b6dd9 100644 --- a/tools/testing/selftests/bpf/progs/raw_tp_null.c +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -3,6 +3,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; @@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) if (task->pid != tid) return 0; - i = i + skb->mark + 1; - /* The compiler may move the NULL check before this deref, which causes - * the load to fail as deref of scalar. Prevent that by using a barrier. + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. */ - barrier(); - /* If dead code elimination kicks in, the increment below will - * be removed. For raw_tp programs, we mark input arguments as - * PTR_MAYBE_NULL, so branch prediction should never kick in. - */ - if (!skb) - i += 2; + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); return 0; } diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c new file mode 100644 index 000000000000..38d669957bf1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +/* Ensure module parameter has PTR_MAYBE_NULL */ +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} + +/* Check NULL marking */ +SEC("tp_btf/sched_pi_setprio") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) { + asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c index 76556e0b42b2..69da05bb6c63 100644 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; extern const int bpf_task_storage_busy __ksym; char _license[] SEC("license") = "GPL"; @@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy) { int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; if (bpf_get_current_pid_tgid() >> 32 != pid) diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c index ea2dbb80f7b3..986829aaf73a 100644 --- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; #define EBUSY 16 #endif -extern bool CONFIG_PREEMPT __kconfig __weak; +extern bool CONFIG_PREEMPTION __kconfig __weak; int nr_get_errs = 0; int nr_del_errs = 0; @@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int ret, zero = 0; int *value; - if (!CONFIG_PREEMPT) + if (!CONFIG_PREEMPTION) return 0; task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index d1a57f7d09bd..fe6249d99b31 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb) __sink(skb); __sink(ret); + /* let verifier know that 'subprog_tc' can change pointers to skb->data */ + bpf_skb_change_proto(skb, 0, 0); return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index e68667aec6a6..cd4d752bd089 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") -__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val = 0; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c new file mode 100644 index 000000000000..2796dd8545eb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 ByteDance */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +long change_tail_ret = 1; + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + char *data, *data_end; + + bpf_skb_pull_data(skb, 1); + data = (char *)(unsigned long)skb->data; + data_end = (char *)(unsigned long)skb->data_end; + + if (data + 1 > data_end) + return SK_PASS; + + if (data[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0); + return SK_PASS; + } else if (data[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); + return SK_PASS; + } else if (data[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return SK_PASS; + } + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c new file mode 100644 index 000000000000..28edafe803f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/pkt_cls.h> + +long change_tail_ret = 1; + +static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = data; + struct iphdr *iph; + + /* Verify Ethernet header */ + if ((void *)(data + sizeof(*eth)) > data_end) + return NULL; + + /* Skip Ethernet header to get to IP header */ + iph = (void *)(data + sizeof(struct ethhdr)); + + /* Verify IP header */ + if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end) + return NULL; + + /* Basic IP header validation */ + if (iph->version != 4) /* Only support IPv4 */ + return NULL; + + if (iph->ihl < 5) /* Minimum IP header length */ + return NULL; + + *ip_proto = iph->protocol; + return iph; +} + +static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph) +{ + void *data_end = (void *)(long)skb->data_end; + void *hdr = (void *)iph; + struct udphdr *udp; + + /* Calculate UDP header position */ + udp = hdr + (iph->ihl * 4); + hdr = (void *)udp; + + /* Verify UDP header bounds */ + if ((void *)(hdr + sizeof(*udp)) > data_end) + return NULL; + + return udp; +} + +SEC("tc/ingress") +int change_tail(struct __sk_buff *skb) +{ + int len = skb->len; + struct udphdr *udp; + struct iphdr *iph; + void *data_end; + char *payload; + int ip_proto; + + bpf_skb_pull_data(skb, len); + + data_end = (void *)(long)skb->data_end; + iph = parse_ip_header(skb, &ip_proto); + if (!iph) + return TCX_PASS; + + if (ip_proto != IPPROTO_UDP) + return TCX_PASS; + + udp = parse_udp_header(skb, iph); + if (!udp) + return TCX_PASS; + + payload = (char *)udp + (sizeof(struct udphdr)); + if (payload + 1 > (char *)data_end) + return TCX_PASS; + + if (payload[0] == 'T') { /* Trim the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'G') { /* Grow the packet */ + change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0); + if (!change_tail_ret) + bpf_skb_change_tail(skb, len, 0); + return TCX_PASS; + } else if (payload[0] == 'E') { /* Error */ + change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + return TCX_PASS; + } else if (payload[0] == 'Z') { /* Zero */ + change_tail_ret = bpf_skb_change_tail(skb, 0, 0); + return TCX_PASS; + } + return TCX_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 10d825928499..630f12e51b07 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -8,6 +8,7 @@ #include <linux/if_packet.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char LICENSE[] SEC("license") = "GPL"; @@ -27,6 +28,7 @@ bool seen_host; bool seen_mcast; int mark, prio; +unsigned short headroom, tailroom; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -104,11 +106,24 @@ out: return TCX_PASS; } +struct sk_buff { + struct net_device *dev; +}; + +struct net_device { + unsigned short needed_headroom; + unsigned short needed_tailroom; +}; + SEC("tc/egress") int tc8(struct __sk_buff *skb) { + struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev); + seen_tc8 = true; mark = skb->mark; prio = skb->priority; + headroom = BPF_CORE_READ(dev, needed_headroom); + tailroom = BPF_CORE_READ(dev, needed_tailroom); return TCX_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index 5aaf2b065f86..bba3e37f749b 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -7,11 +7,7 @@ #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") -/* This used to be a failure test, but raw_tp nullable arguments can now - * directly be dereferenced, whether they have nullable annotation or not, - * and don't need to be explicitly checked. - */ -__success +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a7c4a7d49fe6..fe2d71ae0e71 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -8,7 +8,7 @@ #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -SEC("t") +SEC("tc") int ing_cls(struct __sk_buff *ctx) { __u8 *data, *data_meta, *data_end; @@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx) return diff ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("x") +SEC("xdp") int ing_xdp(struct xdp_md *ctx) { __u8 *data, *data_meta, *data_end; diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index 7c881bca9af5..8bcddadfc4da 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -32,18 +32,18 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) SEC("iter/cgroup") __description("uninitialized iter in ->next()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { - struct bpf_iter_bits *it = NULL; + struct bpf_iter_bits it = {}; - bpf_iter_bits_next(it); + bpf_iter_bits_next(&it); return 0; } SEC("iter/cgroup") __description("uninitialized iter in ->destroy()") -__failure __msg("expected an initialized iter_bits as arg #1") +__failure __msg("expected an initialized iter_bits as arg #0") int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) { struct bpf_iter_bits it = {}; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c index a570e48b917a..28b939572cda 100644 --- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void btf_ctx_access_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\ + r2 = *(u64 *)(r1 + 8); /* load 2nd argument value (int pointer) */\ r0 = 0; \ exit; \ " ::: __clobber_all); @@ -23,7 +23,43 @@ __success __retval(0) __naked void ctx_access_u32_pointer_accept(void) { asm volatile (" \ - r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r2 = *(u64 *)(r1 + 0); /* load 1nd argument value (u32 pointer) */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u32") +__failure __msg("size 4 must be 8") +__naked void ctx_access_u32_pointer_reject_32(void) +{ + asm volatile (" \ + r2 = *(u32 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u16") +__failure __msg("size 2 must be 8") +__naked void ctx_access_u32_pointer_reject_16(void) +{ + asm volatile (" \ + r2 = *(u16 *)(r1 + 0); /* load 1st argument with narrow load */\ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("fentry/bpf_fentry_test9") +__description("btf_ctx_access u32 pointer reject u8") +__failure __msg("size 1 must be 8") +__naked void ctx_access_u32_pointer_reject_8(void) +{ + asm volatile (" \ + r2 = *(u8 *)(r1 + 0); /* load 1st argument with narrow load */\ r0 = 0; \ exit; \ " ::: __clobber_all); diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c index ec79cbcfde91..87e51a215558 100644 --- a/tools/testing/selftests/bpf/progs/verifier_d_path.c +++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c @@ -11,7 +11,7 @@ __success __retval(0) __naked void d_path_accept(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ @@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe") __naked void d_path_reject(void) { asm volatile (" \ - r1 = *(u32*)(r1 + 0); \ + r1 = *(u64 *)(r1 + 0); \ r2 = r10; \ r2 += -8; \ r6 = 0; \ diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c index 70c7600a26a0..4ccf1ebc42d1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_mtu.c +++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c @@ -6,7 +6,9 @@ SEC("tc/ingress") __description("uninit/mtu: write rejected") -__failure __msg("invalid indirect read from stack") +__success +__caps_unpriv(CAP_BPF|CAP_NET_ADMIN) +__failure_unpriv __msg_unpriv("invalid indirect read from stack") int tc_uninit_mtu(struct __sk_buff *ctx) { __u32 mtu; diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index d3e70e38e442..0d5e56dffabb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -1037,4 +1044,53 @@ __naked void sock_create_read_src_port(void) : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 671d9f415dbf..1e5a511e8494 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -1244,4 +1244,39 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void) : __clobber_all); } +SEC("socket") +__description("stack_noperfmon: reject read of invalid slots") +__success +__caps_unpriv(CAP_BPF) +__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8") +__naked void stack_noperfmon_reject_invalid_read(void) +{ + asm volatile (" \ + r2 = 1; \ + r6 = r10; \ + r6 += -8; \ + *(u8 *)(r6 + 0) = r2; \ + r2 = *(u64 *)(r6 + 0); \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots") +__success +__caps_unpriv(CAP_BPF) +__success_unpriv +__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void) +{ + asm volatile(" \ + r0 = 0; \ + *(u64 *)(r10 - 8) = r0; \ + *(u32 *)(r10 - 8) = r0; \ + exit; \ +" : + : + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h index ca0162b4dc57..1fcfa5160231 100644 --- a/tools/testing/selftests/bpf/sdt.h +++ b/tools/testing/selftests/bpf/sdt.h @@ -102,6 +102,8 @@ # define STAP_SDT_ARG_CONSTRAINT nZr # elif defined __arm__ # define STAP_SDT_ARG_CONSTRAINT g +# elif defined __loongarch__ +# define STAP_SDT_ARG_CONSTRAINT nmr # else # define STAP_SDT_ARG_CONSTRAINT nor # endif diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 3e9b009580d4..53b06647cf57 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -36,6 +36,7 @@ #define TEST_TAG_ARCH "comment:test_arch=" #define TEST_TAG_JITED_PFX "comment:test_jited=" #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv=" +#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -74,6 +75,7 @@ struct test_subspec { struct expected_msgs jited; int retval; bool execute; + __u64 caps; }; struct test_spec { @@ -276,6 +278,37 @@ static int parse_int(const char *str, int *val, const char *name) return 0; } +static int parse_caps(const char *str, __u64 *val, const char *name) +{ + int cap_flag = 0; + char *token = NULL, *saveptr = NULL; + + char *str_cpy = strdup(str); + if (str_cpy == NULL) { + PRINT_FAIL("Memory allocation failed\n"); + return -EINVAL; + } + + token = strtok_r(str_cpy, "|", &saveptr); + while (token != NULL) { + errno = 0; + if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) { + PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token); + return -EINVAL; + } + cap_flag = strtol(token, NULL, 10); + if (!cap_flag || errno) { + PRINT_FAIL("failed to parse caps %s\n", name); + return -EINVAL; + } + *val |= (1ULL << cap_flag); + token = strtok_r(NULL, "|", &saveptr); + } + + free(str_cpy); + return 0; +} + static int parse_retval(const char *str, int *val, const char *name) { struct { @@ -541,6 +574,12 @@ static int parse_test_spec(struct test_loader *tester, jit_on_next_line = true; } else if (str_has_pfx(s, TEST_BTF_PATH)) { spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; + } else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) { + val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1; + err = parse_caps(val, &spec->unpriv.caps, "test caps"); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; } } @@ -917,6 +956,13 @@ void run_subtest(struct test_loader *tester, test__end_subtest(); return; } + if (subspec->caps) { + err = cap_enable_effective(subspec->caps, NULL); + if (err) { + PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(err)); + goto subtest_cleanup; + } + } } /* Implicitly reset to NULL if next test case doesn't specify */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index e5c7ecbe57e3..fd2da2234cc9 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1579,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt) static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt) { - txmsg_redir = 1; opt->tx_wait_mem = true; + txmsg_redir = 1; + test_send_large(opt, cgrp); + + txmsg_redir = 1; + txmsg_apply = 4097; test_send_large(opt, cgrp); opt->tx_wait_mem = false; } diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh deleted file mode 100755 index 2740322c1878..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -BPF_FILE="test_xdp_meta.bpf.o" -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" - -cleanup() -{ - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_meta [PASS]"; - else - echo "selftests: test_xdp_meta [FAILED]"; - fi - - set +e - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -ip link set dev lo xdp off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit $KSFT_SKIP -fi -set -e - -ip netns add ${NS1} -ip netns add ${NS2} - -trap cleanup 0 2 3 6 9 - -ip link add veth1 type veth peer name veth2 - -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} - -ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1 -ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 - -ip netns exec ${NS1} tc qdisc add dev veth1 clsact -ip netns exec ${NS2} tc qdisc add dev veth2 clsact - -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t - -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x - -ip netns exec ${NS1} ip link set dev veth1 up -ip netns exec ${NS2} ip link set dev veth2 up - -ip netns exec ${NS1} ping -c 1 10.1.1.22 -ip netns exec ${NS2} ping -c 1 10.1.1.11 - -exit 0 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 2d742fdac6b9..81943c6254e6 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -293,6 +293,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st return 0; } #else +# ifndef PROCMAP_QUERY_VMA_EXECUTABLE +# define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 +# endif + static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) { return -EOPNOTSUPP; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..e38675d9b118 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XSK_UMEM__DEFAULT_FLAGS, + .flags = XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; @@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname) { struct hwtstamp_config cfg = { .rx_filter = HWTSTAMP_FILTER_ALL, + .tx_type = HWTSTAMP_TX_ON, }; hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 03c1bdaed2c3..400a696a0d21 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus # # If isolated CPUs have been reserved at boot time (as shown in -# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 +# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These isolated CPUs will also be removed -# before being compared with the expected results. +# the tests may fail incorrectly. These pre-isolated CPUs should stay in +# an isolated state throughout the testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) if [[ -n "$BOOT_ISOLCPUS" ]] then - [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && + [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi @@ -684,14 +684,18 @@ check_isolcpus() fi # + # Appending pre-isolated CPUs + # Even though CPU #8 isn't used for testing, it can't be pre-isolated + # to make appending those CPUs easier. + # + [[ -n "$BOOT_ISOLCPUS" ]] && { + EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} + EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + } + + # # Check cpuset.cpus.isolated cpumask # - if [[ -z "$BOOT_ISOLCPUS" ]] - then - ISOLCPUS=$(cat $ISCPUS) - else - ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") - fi [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 @@ -731,8 +735,6 @@ check_isolcpus() fi done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ -n "BOOT_ISOLCPUS" ]] && - ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] } @@ -836,8 +838,11 @@ run_state_test() # if available [[ -n "$ICPUS" ]] && { check_isolcpus $ICPUS - [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} + test_fail $I "isolated CPU" \ + "Expect $ICPUS, get $ISOLCPUS instead" + } } reset_cgroup_states # diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile new file mode 100644 index 000000000000..ed210037b29d --- /dev/null +++ b/tools/testing/selftests/coredump/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +CFLAGS = $(KHDR_INCLUDES) + +TEST_GEN_PROGS := stackdump_test +TEST_FILES := stackdump + +include ../lib.mk diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst new file mode 100644 index 000000000000..164a7aa181c8 --- /dev/null +++ b/tools/testing/selftests/coredump/README.rst @@ -0,0 +1,50 @@ +coredump selftest +================= + +Background context +------------------ + +`coredump` is a feature which dumps a process's memory space when the process terminates +unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default, +`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a +different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a +user-space program by writing the pipe symbol (`|`) followed by the command to be executed to +`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`. + +The piped user program may be interested in reading the stack pointers of the crashed process. The +crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in +`/proc/$PID/stat`. See `man 5 proc` for all the details. + +The problem +----------- +While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field +reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid +value. + +However, this was broken in the past and `kstkesp` was zero even during coredump: + +* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to + always be zero + +* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the + coredumping thread. However, other threads in a coredumping process still had the problem. + +* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed + for all threads in a coredumping process. + +* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again + for the other threads in a coredumping process. + +The problem has been fixed now, but considering the history, it may appear again in the future. + +The goal of this test +--------------------- +This test detects problem with reading `kstkesp` during coredump by doing the following: + +#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script + reads the stack pointers of all threads of crashed processes. + +#. Spawn a child process who creates some threads and then crashes. + +#. Read the output from the "stackdump" script, and make sure all stack pointer values are + non-zero. diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump new file mode 100755 index 000000000000..96714ce42d12 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump @@ -0,0 +1,14 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +CRASH_PROGRAM_ID=$1 +STACKDUMP_FILE=$2 + +TMP=$(mktemp) + +for t in /proc/$CRASH_PROGRAM_ID/task/*; do + tid=$(basename $t) + cat /proc/$tid/stat | awk '{print $29}' >> $TMP +done + +mv $TMP $STACKDUMP_FILE diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c new file mode 100644 index 000000000000..137b2364a082 --- /dev/null +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <fcntl.h> +#include <libgen.h> +#include <linux/limits.h> +#include <pthread.h> +#include <string.h> +#include <sys/resource.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define STACKDUMP_FILE "stack_values" +#define STACKDUMP_SCRIPT "stackdump" +#define NUM_THREAD_SPAWN 128 + +static void *do_nothing(void *) +{ + while (1) + pause(); +} + +static void crashing_child(void) +{ + pthread_t thread; + int i; + + for (i = 0; i < NUM_THREAD_SPAWN; ++i) + pthread_create(&thread, NULL, do_nothing, NULL); + + /* crash on purpose */ + i = *(int *)NULL; +} + +FIXTURE(coredump) +{ + char original_core_pattern[256]; +}; + +FIXTURE_SETUP(coredump) +{ + char buf[PATH_MAX]; + FILE *file; + char *dir; + int ret; + + file = fopen("/proc/sys/kernel/core_pattern", "r"); + ASSERT_NE(NULL, file); + + ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file); + ASSERT_TRUE(ret || feof(file)); + ASSERT_LT(ret, sizeof(self->original_core_pattern)); + + self->original_core_pattern[ret] = '\0'; + + ret = fclose(file); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(coredump) +{ + const char *reason; + FILE *file; + int ret; + + unlink(STACKDUMP_FILE); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + if (!file) { + reason = "Unable to open core_pattern"; + goto fail; + } + + ret = fprintf(file, "%s", self->original_core_pattern); + if (ret < 0) { + reason = "Unable to write to core_pattern"; + goto fail; + } + + ret = fclose(file); + if (ret) { + reason = "Unable to close core_pattern"; + goto fail; + } + + return; +fail: + /* This should never happen */ + fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); +} + +TEST_F(coredump, stackdump) +{ + struct sigaction action = {}; + unsigned long long stack; + char *test_dir, *line; + size_t line_length; + char buf[PATH_MAX]; + int ret, i; + FILE *file; + pid_t pid; + + /* + * Step 1: Setup core_pattern so that the stackdump script is executed when the child + * process crashes + */ + ret = readlink("/proc/self/exe", buf, sizeof(buf)); + ASSERT_NE(-1, ret); + ASSERT_LT(ret, sizeof(buf)); + buf[ret] = '\0'; + + test_dir = dirname(buf); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(NULL, file); + + ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE); + ASSERT_LT(0, ret); + + ret = fclose(file); + ASSERT_EQ(0, ret); + + /* Step 2: Create a process who spawns some threads then crashes */ + pid = fork(); + ASSERT_TRUE(pid >= 0); + if (pid == 0) + crashing_child(); + + /* + * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file + */ + for (i = 0; i < 10; ++i) { + file = fopen(STACKDUMP_FILE, "r"); + if (file) + break; + sleep(1); + } + ASSERT_NE(file, NULL); + + /* Step 4: Make sure all stack pointer values are non-zero */ + for (i = 0; -1 != getline(&line, &line_length, file); ++i) { + stack = strtoull(line, NULL, 10); + ASSERT_NE(stack, 0); + } + + ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); + + fclose(file); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b2a6a5dd1af..812f656260fb 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -6,7 +6,7 @@ TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race TEST_GEN_FILES += debugfs_target_ids_pid_leak TEST_GEN_FILES += access_memory access_memory_even -TEST_FILES = _chk_dependency.sh _debugfs_common.sh +TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py # functionality tests TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh index aa995516870b..54d45791b0d9 100644 --- a/tools/testing/selftests/damon/_debugfs_common.sh +++ b/tools/testing/selftests/damon/_debugfs_common.sh @@ -8,7 +8,12 @@ test_write_result() { expect_reason=$4 expected=$5 - echo "$content" > "$file" + if [ "$expected" = "0" ] + then + echo "$content" > "$file" + else + echo "$content" > "$file" 2> /dev/null + fi if [ $? -ne "$expected" ] then echo "writing $content to $file doesn't return $expected" diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index 3be121487432..a9f4e9aaf3a9 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -14,10 +14,8 @@ int main(int argc, char *argv[]) { char **regions; - clock_t start_clock; int nr_regions; int sz_region; - int access_time_ms; int i; if (argc != 3) { diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh index 4a76e37ef16b..bd6c22d96ead 100755 --- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh @@ -12,7 +12,7 @@ then exit 1 fi -if echo foo > "$DBGFS/mk_contexts" +if echo foo > "$DBGFS/mk_contexts" 2> /dev/null then echo "duplicate context creation success" exit 1 diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index a6fe0689f88d..53e69a669668 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -18,7 +18,7 @@ void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); - char buf[25]; + char buf[256]; int ret; printf("%s %s\n", __func__, file); @@ -28,9 +28,7 @@ void write_read_with_huge_count(char *file) } write(filedesc, "", 0xfffffffful); - perror("after write: "); ret = read(filedesc, buf, 0xfffffffful); - perror("after read: "); close(filedesc); } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 0fec8f9801ad..137470bdee0c 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,15 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 TEST_INCLUDES := $(wildcard lib/py/*.py) \ + $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ TEST_PROGS := \ netcons_basic.sh \ + netcons_overflow.sh \ ping.py \ queues.py \ stats.py \ shaper.py \ + hds.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 03a089165d3f..2b10854e4b1e 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -10,7 +10,7 @@ TEST_PROGS := \ mode-2-recovery-updelay.sh \ bond_options.sh \ bond-eth-type-change.sh \ - bond_macvlan.sh + bond_macvlan_ipvlan.sh TEST_FILES := \ lag_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh deleted file mode 100755 index b609fb6231f4..000000000000 --- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test macvlan over balance-alb - -lib_dir=$(dirname "$0") -source ${lib_dir}/bond_topo_2d1c.sh - -m1_ns="m1-$(mktemp -u XXXXXX)" -m2_ns="m1-$(mktemp -u XXXXXX)" -m1_ip4="192.0.2.11" -m1_ip6="2001:db8::11" -m2_ip4="192.0.2.12" -m2_ip6="2001:db8::12" - -cleanup() -{ - ip -n ${m1_ns} link del macv0 - ip netns del ${m1_ns} - ip -n ${m2_ns} link del macv0 - ip netns del ${m2_ns} - - client_destroy - server_destroy - gateway_destroy -} - -check_connection() -{ - local ns=${1} - local target=${2} - local message=${3:-"macvlan_over_bond"} - RET=0 - - - ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null - check_err $? "ping failed" - log_test "$mode: $message" -} - -macvlan_over_bond() -{ - local param="$1" - RET=0 - - # setup new bond mode - bond_reset "${param}" - - ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge - ip -n ${s_ns} link set macv0 netns ${m1_ns} - ip -n ${m1_ns} link set dev macv0 up - ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0 - ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0 - - ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge - ip -n ${s_ns} link set macv0 netns ${m2_ns} - ip -n ${m2_ns} link set dev macv0 up - ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0 - ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0 - - sleep 2 - - check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" - check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" - check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1" - check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1" - check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2" - check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2" - check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2" - check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2" - - - sleep 5 - - check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" - check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" - check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client" - check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client" - check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client" - check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client" - check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2" - check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2" - - ip -n ${c_ns} neigh flush dev eth0 -} - -trap cleanup EXIT - -setup_prepare -ip netns add ${m1_ns} -ip netns add ${m2_ns} - -modes="active-backup balance-tlb balance-alb" - -for mode in $modes; do - macvlan_over_bond "mode $mode" -done - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh new file mode 100755 index 000000000000..c4711272fe45 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test macvlan/ipvlan over bond + +lib_dir=$(dirname "$0") +source ${lib_dir}/bond_topo_2d1c.sh + +xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)" +xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)" +xvlan1_ip4="192.0.2.11" +xvlan1_ip6="2001:db8::11" +xvlan2_ip4="192.0.2.12" +xvlan2_ip6="2001:db8::12" + +cleanup() +{ + client_destroy + server_destroy + gateway_destroy + + ip netns del ${xvlan1_ns} + ip netns del ${xvlan2_ns} +} + +check_connection() +{ + local ns=${1} + local target=${2} + local message=${3} + RET=0 + + ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null + check_err $? "ping failed" + log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}" +} + +xvlan_over_bond() +{ + local param="$1" + local xvlan_type="$2" + local xvlan_mode="$3" + RET=0 + + # setup new bond mode + bond_reset "${param}" + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns} + ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0 + + ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode} + ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns} + ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up + ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0 + ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0 + + sleep 2 + + check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" + check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" + check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1" + check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2" + check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2" + check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2" + + check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" + check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" + check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client" + check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client" + check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client" + check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1" + check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1" + + ip -n ${c_ns} neigh flush dev eth0 +} + +trap cleanup EXIT + +setup_prepare +ip netns add ${xvlan1_ns} +ip netns add ${xvlan2_ns} + +bond_modes="active-backup balance-tlb balance-alb" + +for bond_mode in ${bond_modes}; do + xvlan_over_bond "mode ${bond_mode}" macvlan bridge + xvlan_over_bond "mode ${bond_mode}" ipvlan l2 +done + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 899d7fb6ea8e..dad4e5fda4db 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -3,6 +3,7 @@ CONFIG_BRIDGE=y CONFIG_DUMMY=y CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_IPVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py new file mode 100755 index 000000000000..394971b25c0b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hds.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + +def get_hds(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + +def get_hds_thresh(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + +def set_hds_enable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('enabled', rings['tcp-data-split']) + +def set_hds_disable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('disabled', rings['tcp-data-split']) + +def set_hds_thresh_zero(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + ksft_eq(0, rings['hds-thresh']) + +def set_hds_thresh_max(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) + +def set_hds_thresh_gt(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + hds_gt = rings['hds-thresh-max'] + 1 + with ksft_raises(NlError) as e: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_hds, + get_hds_thresh, + set_hds_disable, + set_hds_enable, + set_hds_thresh_zero, + set_hds_thresh_max, + set_hds_thresh_gt], + args=(cfg, EthtoolFamily())) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 8e502a1f8f9b..19a6969643f4 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem) fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - cleanup: free(tmp_mem); diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 05b6fbb3fcdd..ad192fef3117 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -21,9 +21,9 @@ def _enable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") - if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: - fp.write("page_pool_alloc_pages\n") + fp.write("page_pool_alloc_netmems\n") _write_fail_config({ "verbose": 0, @@ -37,7 +37,7 @@ def _disable_pp_allocation_fail(): if not os.path.exists("/sys/kernel/debug/fail_function"): return - if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): with open("/sys/kernel/debug/fail_function/inject", "w") as fp: fp.write("\n") diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 0b49ce7ae678..ca8a7edff3dd 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -3,7 +3,8 @@ import datetime import random -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt +import re +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt, ksft_true from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx, KsftFailEx @@ -96,6 +97,13 @@ def _send_traffic_check(cfg, port, name, params): f"traffic on inactive queues ({name}): " + str(cnts)) +def _ntuple_rule_check(cfg, rule_id, ctx_id): + """Check that ntuple rule references RSS context ID""" + text = ethtool(f"-n {cfg.ifname} rule {rule_id}").stdout + pattern = f"RSS Context (ID: )?{ctx_id}" + ksft_true(re.search(pattern, text), "RSS context not referenced in ntuple rule") + + def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" @@ -459,6 +467,8 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + _ntuple_rule_check(cfg, ntuple, ctx_id) + for i in range(ctx_cnt): _send_traffic_check(cfg, ports[i], f"context {i}", { 'target': (2+i*2, 3+i*2), diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1ea9bb695e94..987e452d3a45 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -5,7 +5,7 @@ import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup -from lib.py import cmd, ethtool, ip +from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -48,6 +48,7 @@ class NetDrvEnv: else: self._ns = NetdevSimDev(**kwargs) self.dev = self._ns.nsims[0].dev + self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] def __enter__(self): @@ -234,7 +235,12 @@ class NetDrvEpEnv: Good drivers will tell us via ethtool what their sync period is. """ if self._stats_settle_time is None: - data = ethtool("-c " + self.ifname, json=True)[0] + data = {} + try: + data = ethtool("-c " + self.ifname, json=True)[0] + except CmdExitFailure as e: + if "Operation not supported" not in e.cmd.stderr: + raise self._stats_settle_time = 0.025 + \ data.get('stats-block-usecs', 0) / 1000 / 1000 diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh new file mode 100644 index 000000000000..3acaba41ac7b --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -0,0 +1,225 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This file contains functions and helpers to support the netconsole +# selftests +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +SRCIF="" # to be populated later +SRCIP=192.0.2.1 +DSTIF="" # to be populated later +DSTIP=192.0.2.2 + +PORT="6666" +MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" +TARGET=$(mktemp -u netcons_XXXXX) +DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) +NETCONS_CONFIGFS="/sys/kernel/config/netconsole" +NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +# NAMESPACE will be populated by setup_ns with a random value +NAMESPACE="" + +# IDs for netdevsim +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" + +# Used to create and delete namespaces +source "${LIBDIR}"/../../../../net/lib.sh +source "${LIBDIR}"/../../../../net/net_helper.sh + +# Create netdevsim interfaces +create_ifaces() { + + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" + udevadm settle 2> /dev/null || true + + local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" + local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" + + # These are global variables + SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ + -path "$NSIM1"/net -exec basename {} \;) + DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ + -path "$NSIM2"/net -exec basename {} \;) +} + +link_ifaces() { + local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) + local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) + + exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" + exec {INITNS_FD}</proc/self/ns/net + + # Bind the dst interface to namespace + ip link set "${DSTIF}" netns "${NAMESPACE}" + + # Linking one device to the other one (on the other namespace} + if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK + then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup + exit "${ksft_skip}" + fi +} + +function configure_ip() { + # Configure the IPs for both interfaces + ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up + + ip addr add "${SRCIP}"/24 dev "${SRCIF}" + ip link set "${SRCIF}" up +} + +function set_network() { + # setup_ns function is coming from lib.sh + setup_ns NAMESPACE + + # Create both interfaces, and assign the destination to a different + # namespace + create_ifaces + + # Link both interfaces back to back + link_ifaces + + configure_ip +} + +function create_dynamic_target() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + + # Create a dynamic target + mkdir "${NETCONS_PATH}" + + echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip + echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip + echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac + echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + + echo 1 > "${NETCONS_PATH}"/enabled +} + +function cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" + + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + # Delete netdevsim devices + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" + + # this is coming from lib.sh + cleanup_all_ns + + # Restoring printk configurations + echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk +} + +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + +function listen_port_and_save_to() { + local OUTPUT=${1} + # Just wait for 2 seconds + timeout 2 ip netns exec "${NAMESPACE}" \ + socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" +} + +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + # Check if the file exists + if [ ! -f "$TMPFILENAME" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${TMPFILENAME}"; then + echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + # Delete the file once it is validated, otherwise keep it + # for debugging purposes + rm "${TMPFILENAME}" + exit "${ksft_pass}" +} + +function check_for_dependencies() { + if [ "$(id -u)" -ne 0 ]; then + echo "This test must be run as root" >&2 + exit "${ksft_skip}" + fi + + if ! which socat > /dev/null ; then + echo "SKIP: socat(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which ip > /dev/null ; then + echo "SKIP: ip(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which udevadm > /dev/null ; then + echo "SKIP: udevadm(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then + echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 + exit "${ksft_skip}" + fi + + if [ ! -d "${NETCONS_CONFIGFS}" ]; then + echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 + exit "${ksft_skip}" + fi + + if ip link show "${DSTIF}" 2> /dev/null; then + echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then + echo "SKIP: IPs already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh index b79542a4dcc7..4a11bf1d514a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh @@ -12,6 +12,7 @@ ALL_TESTS=" bridge_rif_remaster_port " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh index e28f978104f3..b8bbe94f4736 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh @@ -10,6 +10,7 @@ ALL_TESTS=" lag_rif_nomaster_addr " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh index 6318cfa6434c..d1a9d379eaf3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh @@ -10,6 +10,7 @@ ALL_TESTS=" lag_rif_nomaster_addr " +REQUIRE_TEAMD="yes" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh index 0c47faff9274..c068e6c2a580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -22,20 +22,34 @@ SB_ITC=0 h1_create() { simple_if_init $h1 192.0.1.1/24 + tc qdisc add dev $h1 clsact + + # Add egress filter on $h1 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h1 egress pref 2 handle 102 matchall action drop } h1_destroy() { + tc filter del dev $h1 egress pref 2 handle 102 matchall action drop + tc qdisc del dev $h1 clsact simple_if_fini $h1 192.0.1.1/24 } h2_create() { simple_if_init $h2 192.0.1.2/24 + tc qdisc add dev $h2 clsact + + # Add egress filter on $h2 that will guarantee that the packet sent, + # will be the only packet being passed to the device. + tc filter add dev $h2 egress pref 1 handle 101 matchall action drop } h2_destroy() { + tc filter del dev $h2 egress pref 1 handle 101 matchall action drop + tc qdisc del dev $h2 clsact simple_if_fini $h2 192.0.1.2/24 } @@ -101,6 +115,11 @@ port_pool_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ @@ -109,11 +128,6 @@ port_pool_test() devlink sb occupancy snapshot $DEVLINK_DEV RET=0 - max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ) - check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress pool" - - RET=0 max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ) check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress pool" @@ -122,6 +136,11 @@ port_pool_test() max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ) check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress pool" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass } port_tc_ip_test() @@ -129,6 +148,11 @@ port_tc_ip_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ @@ -139,17 +163,17 @@ port_tc_ip_test() RET=0 max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress TC - IP packet" - - RET=0 - max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) - check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress TC - IP packet" RET=0 max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ) check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress TC - IP packet" + + tc filter del dev $h1 egress protocol ip pref 1 handle 101 flower \ + src_mac $h1mac dst_mac $h2mac \ + src_ip 192.0.1.1 dst_ip 192.0.1.2 \ + action pass } port_tc_arp_test() @@ -157,6 +181,9 @@ port_tc_arp_test() local exp_max_occ=$(devlink_cell_size_get) local max_occ + tc filter add dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass + devlink sb occupancy clearmax $DEVLINK_DEV $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q @@ -166,17 +193,15 @@ port_tc_arp_test() RET=0 max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" - log_test "physical port's($h1) ingress TC - ARP packet" - - RET=0 - max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ) - check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ" log_test "physical port's($h2) ingress TC - ARP packet" RET=0 max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ) check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ" log_test "CPU port's egress TC - ARP packet" + + tc filter del dev $h1 egress protocol arp pref 1 handle 101 flower \ + src_mac $h1mac action pass } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index b175f4d966e5..fe765da498e8 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -18,224 +18,8 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -# Simple script to test dynamic targets in netconsole -SRCIF="" # to be populated later -SRCIP=192.0.2.1 -DSTIF="" # to be populated later -DSTIP=192.0.2.2 +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh -PORT="6666" -MSG="netconsole selftest" -USERDATA_KEY="key" -USERDATA_VALUE="value" -TARGET=$(mktemp -u netcons_XXXXX) -DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) -NETCONS_CONFIGFS="/sys/kernel/config/netconsole" -NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" -KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" -# NAMESPACE will be populated by setup_ns with a random value -NAMESPACE="" - -# IDs for netdevsim -NSIM_DEV_1_ID=$((256 + RANDOM % 256)) -NSIM_DEV_2_ID=$((512 + RANDOM % 256)) -NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" - -# Used to create and delete namespaces -source "${SCRIPTDIR}"/../../net/lib.sh -source "${SCRIPTDIR}"/../../net/net_helper.sh - -# Create netdevsim interfaces -create_ifaces() { - - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" - echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" - udevadm settle 2> /dev/null || true - - local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" - local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" - - # These are global variables - SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ - -path "$NSIM1"/net -exec basename {} \;) - DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ - -path "$NSIM2"/net -exec basename {} \;) -} - -link_ifaces() { - local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" - local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) - local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) - - exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" - exec {INITNS_FD}</proc/self/ns/net - - # Bind the dst interface to namespace - ip link set "${DSTIF}" netns "${NAMESPACE}" - - # Linking one device to the other one (on the other namespace} - if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK - then - echo "linking netdevsim1 with netdevsim2 should succeed" - cleanup - exit "${ksft_skip}" - fi -} - -function configure_ip() { - # Configure the IPs for both interfaces - ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" - ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up - - ip addr add "${SRCIP}"/24 dev "${SRCIF}" - ip link set "${SRCIF}" up -} - -function set_network() { - # setup_ns function is coming from lib.sh - setup_ns NAMESPACE - - # Create both interfaces, and assign the destination to a different - # namespace - create_ifaces - - # Link both interfaces back to back - link_ifaces - - configure_ip -} - -function create_dynamic_target() { - DSTMAC=$(ip netns exec "${NAMESPACE}" \ - ip link show "${DSTIF}" | awk '/ether/ {print $2}') - - # Create a dynamic target - mkdir "${NETCONS_PATH}" - - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name - - echo 1 > "${NETCONS_PATH}"/enabled -} - -function cleanup() { - local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" - - # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled - # Remove key - rmdir "${KEY_PATH}" - # Remove the configfs entry - rmdir "${NETCONS_PATH}" - - # Delete netdevsim devices - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" - echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" - - # this is coming from lib.sh - cleanup_all_ns - - # Restoring printk configurations - echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk -} - -function set_user_data() { - if [[ ! -d "${NETCONS_PATH}""/userdata" ]] - then - echo "Userdata path not available in ${NETCONS_PATH}/userdata" - exit "${ksft_skip}" - fi - - mkdir -p "${KEY_PATH}" - VALUE_PATH="${KEY_PATH}""/value" - echo "${USERDATA_VALUE}" > "${VALUE_PATH}" -} - -function listen_port_and_save_to() { - local OUTPUT=${1} - # Just wait for 2 seconds - timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" -} - -function validate_result() { - local TMPFILENAME="$1" - - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value - - # Check if the file exists - if [ ! -f "$TMPFILENAME" ]; then - echo "FAIL: File was not generated." >&2 - exit "${ksft_fail}" - fi - - if ! grep -q "${MSG}" "${TMPFILENAME}"; then - echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" - fi - - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" - fi - - # Delete the file once it is validated, otherwise keep it - # for debugging purposes - rm "${TMPFILENAME}" - exit "${ksft_pass}" -} - -function check_for_dependencies() { - if [ "$(id -u)" -ne 0 ]; then - echo "This test must be run as root" >&2 - exit "${ksft_skip}" - fi - - if ! which socat > /dev/null ; then - echo "SKIP: socat(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if ! which ip > /dev/null ; then - echo "SKIP: ip(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if ! which udevadm > /dev/null ; then - echo "SKIP: udevadm(1) is not available" >&2 - exit "${ksft_skip}" - fi - - if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then - echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 - exit "${ksft_skip}" - fi - - if [ ! -d "${NETCONS_CONFIGFS}" ]; then - echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 - exit "${ksft_skip}" - fi - - if ip link show "${DSTIF}" 2> /dev/null; then - echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 - exit "${ksft_skip}" - fi - - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 - exit "${ksft_skip}" - fi -} - -# ========== # -# Start here # -# ========== # modprobe netdevsim 2> /dev/null || true modprobe netconsole 2> /dev/null || true diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh new file mode 100755 index 000000000000..29bad56448a2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test verifies that users can successfully create up to +# MAX_USERDATA_ITEMS userdata entries without encountering any failures. +# +# Additionally, it tests for expected failure when attempting to exceed this +# maximum limit. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +# This is coming from netconsole code. Check for it in drivers/net/netconsole.c +MAX_USERDATA_ITEMS=16 + +# Function to create userdata entries +function create_userdata_max_entries() { + # All these keys should be created without any error + for i in $(seq $MAX_USERDATA_ITEMS) + do + # USERDATA_KEY is used by set_user_data + USERDATA_KEY="key"${i} + set_user_data + done +} + +# Function to verify the entry limit +function verify_entry_limit() { + # Allowing the test to fail without exiting, since the next command + # will fail + set +e + mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null + ret="$?" + set -e + if [ "$ret" -eq 0 ]; + then + echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2 + ls "${NETCONS_PATH}/userdata/" >&2 + exit "${ksft_fail}" + fi +} + +# ========== # +# Start here # +# ========== # + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies + +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# populate the maximum number of supported keys in userdata +create_userdata_max_entries +# Verify an additional entry is not allowed +verify_entry_limit +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh index fd13c8cfb7a8..b411fe66510f 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh @@ -58,9 +58,12 @@ for root in mq mqprio; do ethtool -L $NDEV combined 4 n_child_assert 4 "One real queue, rest default" - # Graft some - tcq replace parent 100:1 handle 204: - n_child_assert 3 "Grafted" + # Remove real one + tcq del parent 100:4 handle 204: + + # Replace default with pfifo + tcq replace parent 100:1 handle 205: pfifo limit 1000 + n_child_assert 3 "Deleting real one, replacing default one with pfifo" ethtool -L $NDEV combined 1 n_child_assert 1 "Grafted, one" diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 384cfa3d38a6..92c2f0376c08 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -142,7 +142,7 @@ function pre_ethtool { } function check_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 local -n expected=$2 local last=$3 @@ -212,7 +212,7 @@ function check_tables { } function print_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 read -a have < $path tree $NSIM_DEV_DFS/ @@ -641,7 +641,7 @@ for port in 0 1; do NSIM_NETDEV=`get_netdev_name old_netdevs` ip link set dev $NSIM_NETDEV up - echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error msg="1 - create VxLANs v6" exp0=( 0 0 0 0 ) @@ -663,7 +663,7 @@ for port in 0 1; do new_geneve gnv0 20000 msg="2 - destroy GENEVE" - echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error exp1=( `mke 20000 2` 0 0 0 ) del_dev gnv0 @@ -764,7 +764,7 @@ for port in 0 1; do msg="create VxLANs v4" new_vxlan vxlan0 10000 $NSIM_NETDEV - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes down" @@ -775,7 +775,7 @@ for port in 0 1; do fi check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes up again" @@ -789,7 +789,7 @@ for port in 0 1; do del_dev vxlan0 check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="destroy NIC" @@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table" exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) del_dev vxlan2 -echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="tunnels destroyed 2" diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..38303da957ee 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -1,32 +1,37 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 -from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx -from lib.py import EthtoolFamily, NetdevFamily +from lib.py import ksft_disruptive, ksft_exit, ksft_run +from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv -from lib.py import cmd +from lib.py import cmd, defer, ip +import errno import glob -def sys_get_queues(ifname) -> int: - folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') +def sys_get_queues(ifname, qtype='rx') -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') return len(folders) -def nl_get_queues(cfg, nl): +def nl_get_queues(cfg, nl, qtype='rx'): queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) if queues: - return len([q for q in queues if q['type'] == 'rx']) + return len([q for q in queues if q['type'] == qtype]) return None def get_queues(cfg, nl) -> None: - queues = nl_get_queues(cfg, nl) - if not queues: - raise KsftSkipEx('queue-get not supported by device') + snl = NetdevFamily(recv_size=4096) - expected = sys_get_queues(cfg.dev['ifname']) - ksft_eq(queues, expected) + for qtype in ['rx', 'tx']: + queues = nl_get_queues(cfg, snl, qtype) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname'], qtype) + ksft_eq(queues, expected) def addremove_queues(cfg, nl) -> None: @@ -56,9 +61,27 @@ def addremove_queues(cfg, nl) -> None: ksft_eq(queues, expected) +@ksft_disruptive +def check_down(cfg, nl) -> None: + # Check the NAPI IDs before interface goes down and hides them + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + with ksft_raises(NlError) as cm: + nl.queue_get({'ifindex': cfg.ifindex, 'id': 0, 'type': 'rx'}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + if napis: + with ksft_raises(NlError) as cm: + nl.napi_get({'id': napis[0]['id']}) + ksft_eq(cm.exception.nl_msg.error, -errno.ENOENT) + + def main() -> None: - with NetDrvEnv(__file__, queue_count=3) as cfg: - ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) + with NetDrvEnv(__file__, queue_count=100) as cfg: + ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 63e3c045a3b2..efcc1e10575b 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -2,12 +2,15 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import subprocess +import time from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises +from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv -from lib.py import ip, defer +from lib.py import cmd, ip, defer ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -110,6 +113,23 @@ def qstat_by_ifindex(cfg) -> None: ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + # Sanity check the dumps + queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True) + # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}} + parsed = {} + for entry in queues: + ifindex = entry["ifindex"] + if ifindex not in parsed: + parsed[ifindex] = {"rx":[], "tx": []} + parsed[ifindex][entry["queue-type"]].append(entry['queue-id']) + # Now, validate + for ifindex, queues in parsed.items(): + for qtype in ['rx', 'tx']: + ksft_eq(len(queues[qtype]), len(set(queues[qtype])), + comment="repeated queue keys") + ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1, + comment="missing queue keys") + # Test invalid dumps # 0 is invalid with ksft_raises(NlError) as cm: @@ -157,10 +177,95 @@ def check_down(cfg) -> None: netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) +def __run_inf_loop(body): + body = body.strip() + if body[-1] != ';': + body += ';' + + return subprocess.Popen(f"while true; do {body} done", shell=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + +def __stats_increase_sanely(old, new) -> None: + for k in old.keys(): + ksft_ge(new[k], old[k]) + ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error") + + +def procfs_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, which is not an exclusive + lock, make sure drivers can handle parallel reads of stats. + """ + one = __run_inf_loop("cat /proc/net/dev") + defer(one.kill) + two = __run_inf_loop("cat /proc/net/dev") + defer(two.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(one.poll(), None) + ksft_is(two.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + time.sleep(2) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + # defers will kill the loops + + +@ksft_disruptive +def procfs_downup_hammer(cfg) -> None: + """ + Reading stats via procfs only holds the RCU lock, drivers often try + to sleep when reading the stats, or don't protect against races. + """ + # Max out the queues, we'll flip between max and 1 + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + cur_queue_cnt = channels[f'{rx_type}-count'] + max_queue_cnt = channels[f'{rx_type}-max'] + + cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}") + defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}") + + # Real test stats + stats = __run_inf_loop("cat /proc/net/dev") + defer(stats.kill) + + ipset = f"ip link set dev {cfg.ifname}" + defer(ip, f"link set dev {cfg.ifname} up") + # The "echo -n 1" lets us count iterations below + updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \ + f"ethtool -L {cfg.ifname} {rx_type} 1; " + \ + f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \ + "echo -n 1" + updown = __run_inf_loop(updown) + kill_updown = defer(updown.kill) + + time.sleep(1) + # Make sure the processes are running + ksft_is(stats.poll(), None) + ksft_is(updown.poll(), None) + + rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + # We're looking for crashes, give it extra time + time.sleep(9) + rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + __stats_increase_sanely(rtstat1, rtstat2) + + kill_updown.exec() + stdout, _ = updown.communicate(timeout=5) + ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8'))) + + def main() -> None: - with NetDrvEnv(__file__) as cfg: + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, - check_down], + check_down, procfs_hammer, procfs_downup_hammer], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 071e03532cba..8fb7395fd35b 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -23,9 +23,11 @@ #include "../kselftest.h" -#define TESTS_EXPECTED 51 +#define TESTS_EXPECTED 54 #define TEST_NAME_LEN (PATH_MAX * 4) +#define CHECK_COMM "CHECK_COMM" + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -237,6 +239,29 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) return fail; } +static int check_execveat_comm(int fd, char *argv0, char *expected) +{ + char buf[128], *old_env, *old_argv0; + int ret; + + snprintf(buf, sizeof(buf), CHECK_COMM "=%s", expected); + + old_env = envp[1]; + envp[1] = buf; + + old_argv0 = argv[0]; + argv[0] = argv0; + + ksft_print_msg("Check execveat(AT_EMPTY_PATH)'s comm is %s\n", + expected); + ret = check_execveat_invoked_rc(fd, "", AT_EMPTY_PATH, 0, 0); + + envp[1] = old_env; + argv[0] = old_argv0; + + return ret; +} + static int run_tests(void) { int fail = 0; @@ -389,6 +414,14 @@ static int run_tests(void) fail += check_execveat_pathmax(root_dfd, "execveat", 0); fail += check_execveat_pathmax(root_dfd, "script", 1); + + /* /proc/pid/comm gives filename by default */ + fail += check_execveat_comm(fd, "sentinel", "execveat"); + /* /proc/pid/comm gives argv[0] when invoked via link */ + fail += check_execveat_comm(fd_symlink, "sentinel", "execveat"); + /* /proc/pid/comm gives filename if NULL is passed */ + fail += check_execveat_comm(fd, NULL, "execveat"); + return fail; } @@ -415,9 +448,13 @@ int main(int argc, char **argv) int ii; int rc; const char *verbose = getenv("VERBOSE"); + const char *check_comm = getenv(CHECK_COMM); - if (argc >= 2) { - /* If we are invoked with an argument, don't run tests. */ + if (argc >= 2 || check_comm) { + /* + * If we are invoked with an argument, or no arguments but a + * command to check, don't run tests. + */ const char *in_test = getenv("IN_TEST"); if (verbose) { @@ -426,6 +463,38 @@ int main(int argc, char **argv) ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]); } + /* If the tests wanted us to check the command, do so. */ + if (check_comm) { + /* TASK_COMM_LEN == 16 */ + char buf[32]; + int fd, ret; + + fd = open("/proc/self/comm", O_RDONLY); + if (fd < 0) { + ksft_perror("open() comm failed"); + exit(1); + } + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + ksft_perror("read() comm failed"); + close(fd); + exit(1); + } + close(fd); + + // trim off the \n + buf[ret-1] = 0; + + if (strcmp(buf, check_comm)) { + ksft_print_msg("bad comm, got: %s expected: %s\n", + buf, check_comm); + exit(1); + } + + exit(0); + } + /* Check expected environment transferred. */ if (!in_test || strcmp(in_test, "yes") != 0) { ksft_print_msg("no IN_TEST=yes in env\n"); diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 319567f0fae1..81db85a5cc16 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -57,7 +57,6 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) { int fd, ret, saved_errno, result = 1; size_t len; - ssize_t wret; struct binderfs_device device = { 0 }; struct binder_version version = { 0 }; char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/filesystems/nsfs/.gitignore index ed79ebdf286e..92a8249006d1 100644 --- a/tools/testing/selftests/nsfs/.gitignore +++ b/tools/testing/selftests/filesystems/nsfs/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only owner pidns +iterate_mntns diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/filesystems/nsfs/Makefile index dd9bd50b7b93..231aaa7dfd95 100644 --- a/tools/testing/selftests/nsfs/Makefile +++ b/tools/testing/selftests/filesystems/nsfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := owner pidns +TEST_GEN_PROGS := owner pidns iterate_mntns CFLAGS := -Wall -Werror -include ../lib.mk +include ../../lib.mk diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/filesystems/nsfs/config index 598d0a225fc9..598d0a225fc9 100644 --- a/tools/testing/selftests/nsfs/config +++ b/tools/testing/selftests/filesystems/nsfs/config diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c new file mode 100644 index 000000000000..457cf76f3c5f --- /dev/null +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define MNT_NS_COUNT 11 +#define MNT_NS_LAST_INDEX 10 + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) + +FIXTURE(iterate_mount_namespaces) { + int fd_mnt_ns[MNT_NS_COUNT]; + __u64 mnt_ns_id[MNT_NS_COUNT]; +}; + +FIXTURE_SETUP(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) + self->fd_mnt_ns[i] = -EBADF; + + /* + * Creating a new user namespace let's us guarantee that we only see + * mount namespaces that we did actually create. + */ + ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + + for (int i = 0; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + ASSERT_GE(self->fd_mnt_ns[i], 0); + ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0); + self->mnt_ns_id[i] = info.mnt_ns_id; + } +} + +FIXTURE_TEARDOWN(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (self->fd_mnt_ns[i] < 0) + continue; + ASSERT_EQ(close(self->fd_mnt_ns[i]), 0); + } +} + +TEST_F(iterate_mount_namespaces, iterate_all_forward) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + if (fd_mnt_ns_next < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_all_backwards) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + if (fd_mnt_ns_prev < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_forward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[0]; + for (int i = 1; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_F(iterate_mount_namespaces, iterate_backward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX]; + for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/filesystems/nsfs/owner.c index 96a976c74550..96a976c74550 100644 --- a/tools/testing/selftests/nsfs/owner.c +++ b/tools/testing/selftests/filesystems/nsfs/owner.c diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/filesystems/nsfs/pidns.c index e3c772c6a7c7..e3c772c6a7c7 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/filesystems/nsfs/pidns.c diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 3af3136e35a4..14ee91a41650 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test statmount_test_ns +TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c new file mode 100644 index 000000000000..15f0834f7557 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "statmount.h" +#include "../../kselftest_harness.h" + +#ifndef LISTMOUNT_REVERSE +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +#endif + +#define LISTMNT_BUFFER 10 + +/* Check that all mount ids are in increasing order. */ +TEST(listmount_forward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, 0); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_LT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +/* Check that all mount ids are in decreasing order. */ +TEST(listmount_backward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, LISTMOUNT_REVERSE); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_GT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index c773334bbcc9..8eb6aa606a0d 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -27,7 +27,7 @@ static const char *const known_fs[] = { "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc new file mode 100644 index 000000000000..b4ad09237e2a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - Repeating add/remove fprobe events +# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + +echo 0 > events/enable +echo > dynamic_events + +PLACE=$FUNCTION_FORK +REPEAT_TIMES=64 + +for i in `seq 1 $REPEAT_TIMES`; do + echo "f:myevent $PLACE" >> dynamic_events + grep -q myevent dynamic_events + test -d events/fprobes/myevent + echo > dynamic_events +done + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index 61877d166451..c9425a34fae3 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -16,9 +16,7 @@ aarch64) REG=%r0 ;; esac -check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE -check_error 'f^1a111 vfs_read' # BAD_MAXACT -check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG +check_error 'f^100 vfs_read' # BAD_MAXACT check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent) check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index a16c6a6f6055..8f1c58f0c239 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -111,7 +111,7 @@ check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS if !grep -q 'kernel return probes support:' README; then check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS fi -check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS +check_error 'p vfs_read+20 ^$arg*' # NOFENTRY_ARGS check_error 'p vfs_read ^hoge' # NO_BTFARG check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) check_error 'r kfree ^$retval' # NO_RETVAL diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index ca58bfa3ca65..1e979fb3542b 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -54,41 +54,11 @@ FIXTURE_TEARDOWN(hid_bpf) { hid_bpf_teardown(_metadata, self, variant); \ } while (0) -struct specific_device { - const char test_name[64]; - __u16 bus; - __u32 vid; - __u32 pid; -}; - FIXTURE_SETUP(hid_bpf) { - const struct specific_device *match = NULL; int err; - const struct specific_device devices[] = { - { - .test_name = "test_hid_driver_probe", - .bus = BUS_BLUETOOTH, - .vid = 0x05ac, /* USB_VENDOR_ID_APPLE */ - .pid = 0x022c, /* USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI */ - }, { - .test_name = "*", - .bus = BUS_USB, - .vid = 0x0001, - .pid = 0x0a36, - }}; - - for (int i = 0; i < ARRAY_SIZE(devices); i++) { - match = &devices[i]; - if (!strncmp(_metadata->name, devices[i].test_name, sizeof(devices[i].test_name))) - break; - } - - ASSERT_OK_PTR(match); - - err = setup_uhid(_metadata, &self->hid, match->bus, match->vid, match->pid, - rdesc, sizeof(rdesc)); + err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a36, rdesc, sizeof(rdesc)); ASSERT_OK(err); } @@ -885,54 +855,6 @@ TEST_F(hid_bpf, test_hid_attach_flags) ASSERT_EQ(buf[3], 3); } -static bool is_using_driver(struct __test_metadata *_metadata, struct uhid_device *hid, - const char *driver) -{ - char driver_line[512]; - char uevent[1024]; - char temp[512]; - int fd, nread; - bool found = false; - - sprintf(uevent, "/sys/bus/hid/devices/%04X:%04X:%04X.%04X/uevent", - hid->bus, hid->vid, hid->pid, hid->hid_id); - - fd = open(uevent, O_RDONLY | O_NONBLOCK); - if (fd < 0) { - TH_LOG("couldn't open '%s': %d, %d", uevent, fd, errno); - return false; - } - - sprintf(driver_line, "DRIVER=%s", driver); - - nread = read(fd, temp, ARRAY_SIZE(temp)); - if (nread > 0 && (strstr(temp, driver_line)) != NULL) - found = true; - - close(fd); - - return found; -} - -/* - * Attach hid_driver_probe to the given uhid device, - * check that the device is now using hid-generic. - */ -TEST_F(hid_bpf, test_hid_driver_probe) -{ - const struct test_program progs[] = { - { - .name = "hid_test_driver_probe", - }, - }; - - ASSERT_TRUE(is_using_driver(_metadata, &self->hid, "apple")); - - LOAD_PROGRAMS(progs); - - ASSERT_TRUE(is_using_driver(_metadata, &self->hid, "hid-generic")); -} - /* * Attach hid_rdesc_fixup to the given uhid device, * retrieve and open the matching hidraw node, diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index 9b22e9a0e658..5ecc845ef792 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -598,15 +598,3 @@ SEC(".struct_ops.link") struct hid_bpf_ops test_infinite_loop_input_report = { .hid_device_event = (void *)hid_test_infinite_loop_input_report, }; - -SEC("?struct_ops.s/hid_rdesc_fixup") -int BPF_PROG(hid_test_driver_probe, struct hid_bpf_ctx *hid_ctx) -{ - hid_ctx->hid->quirks |= HID_QUIRK_IGNORE_SPECIAL_DRIVER; - return 0; -} - -SEC(".struct_ops.link") -struct hid_bpf_ops test_driver_probe = { - .hid_rdesc_fixup = (void *)hid_test_driver_probe, -}; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 1a645684a117..531228b849da 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -22,6 +22,9 @@ #define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used #define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used +/* do not define kfunc through vmlinux.h as this messes up our custom hack */ +#define BPF_NO_KFUNC_PROTOTYPES + #include "vmlinux.h" #undef hid_bpf_ctx @@ -84,42 +87,38 @@ struct hid_bpf_ops { struct hid_device *hdev; }; -#define BIT(n) (1U << n) - #ifndef BPF_F_BEFORE -#define BPF_F_BEFORE BIT(3) +#define BPF_F_BEFORE (1U << 3) #endif -#define HID_QUIRK_IGNORE_SPECIAL_DRIVER BIT(22) - /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, - const size_t __sz) __ksym; -extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; -extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; + const size_t __sz) __weak __ksym; +extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __weak __ksym; +extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __weak __ksym; extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *data, size_t buf__sz, enum hid_report_type type, - enum hid_class_request reqtype) __ksym; + enum hid_class_request reqtype) __weak __ksym; extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, - __u8 *buf, size_t buf__sz) __ksym; + __u8 *buf, size_t buf__sz) __weak __ksym; extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, - size_t buf__sz) __ksym; + size_t buf__sz) __weak __ksym; /* bpf_wq implementation */ extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, int (callback_fn)(void *map, int *key, void *wq), - unsigned int flags__k, void *aux__ign) __ksym; + unsigned int flags__k, void *aux__ign) __weak __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/hid/run-hid-tools-tests.sh b/tools/testing/selftests/hid/run-hid-tools-tests.sh index bdae8464da86..af1682a53c27 100755 --- a/tools/testing/selftests/hid/run-hid-tools-tests.sh +++ b/tools/testing/selftests/hid/run-hid-tools-tests.sh @@ -2,24 +2,26 @@ # SPDX-License-Identifier: GPL-2.0 # Runs tests for the HID subsystem +KSELFTEST_SKIP_TEST=4 + if ! command -v python3 > /dev/null 2>&1; then echo "hid-tools: [SKIP] python3 not installed" - exit 77 + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest module not installed" - exit 77 + echo "hid: [SKIP] pytest module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import pytest_tap" > /dev/null 2>&1; then - echo "hid: [SKIP/ pytest_tap module not installed" - exit 77 + echo "hid: [SKIP] pytest_tap module not installed" + exit $KSELFTEST_SKIP_TEST fi if ! python3 -c "import hidtools" > /dev/null 2>&1; then - echo "hid: [SKIP/ hid-tools module not installed" - exit 77 + echo "hid: [SKIP] hid-tools module not installed" + exit $KSELFTEST_SKIP_TEST fi TARGET=${TARGET:=.} diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile index fd6477911f24..84abeb2f0949 100644 --- a/tools/testing/selftests/iommu/Makefile +++ b/tools/testing/selftests/iommu/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += $(KHDR_INCLUDES) +LDLIBS += -lcap TEST_GEN_PROGS := TEST_GEN_PROGS += iommufd diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 4927b9add5ad..a1b2b657999d 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <asm/unistd.h> #include <stdlib.h> +#include <sys/capability.h> #include <sys/mman.h> #include <sys/eventfd.h> @@ -49,6 +51,9 @@ static __attribute__((constructor)) void setup_sizes(void) vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); assert(vrc == buffer); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } FIXTURE(iommufd) @@ -128,6 +133,11 @@ TEST_F(iommufd, cmd_length) TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length); TEST_LENGTH(iommu_option, IOMMU_OPTION, val64); TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved); + TEST_LENGTH(iommu_ioas_map_file, IOMMU_IOAS_MAP_FILE, iova); + TEST_LENGTH(iommu_viommu_alloc, IOMMU_VIOMMU_ALLOC, out_viommu_id); + TEST_LENGTH(iommu_vdevice_alloc, IOMMU_VDEVICE_ALLOC, virt_id); + TEST_LENGTH(iommu_ioas_change_process, IOMMU_IOAS_CHANGE_PROCESS, + __reserved); #undef TEST_LENGTH } @@ -186,6 +196,144 @@ TEST_F(iommufd, global_options) EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); } +static void drop_cap_ipc_lock(struct __test_metadata *_metadata) +{ + cap_t caps; + cap_value_t cap_list[1] = { CAP_IPC_LOCK }; + + caps = cap_get_proc(); + ASSERT_NE(caps, NULL); + ASSERT_NE(-1, + cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); + ASSERT_NE(-1, cap_set_proc(caps)); + cap_free(caps); +} + +static long get_proc_status_value(pid_t pid, const char *var) +{ + FILE *fp; + char buf[80], tag[80]; + long val = -1; + + snprintf(buf, sizeof(buf), "/proc/%d/status", pid); + fp = fopen(buf, "r"); + if (!fp) + return val; + + while (fgets(buf, sizeof(buf), fp)) + if (fscanf(fp, "%s %ld\n", tag, &val) == 2 && !strcmp(tag, var)) + break; + + fclose(fp); + return val; +} + +static long get_vm_pinned(pid_t pid) +{ + return get_proc_status_value(pid, "VmPin:"); +} + +static long get_vm_locked(pid_t pid) +{ + return get_proc_status_value(pid, "VmLck:"); +} + +FIXTURE(change_process) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(change_process) +{ + int accounting; +}; + +FIXTURE_SETUP(change_process) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + drop_cap_ipc_lock(_metadata); + if (variant->accounting != IOPT_PAGES_ACCOUNT_NONE) { + struct iommu_option set_limit_cmd = { + .size = sizeof(set_limit_cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .val64 = (variant->accounting == IOPT_PAGES_ACCOUNT_MM), + }; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &set_limit_cmd)); + } + + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); +} + +FIXTURE_TEARDOWN(change_process) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(change_process, account_none) +{ + .accounting = IOPT_PAGES_ACCOUNT_NONE, +}; + +FIXTURE_VARIANT_ADD(change_process, account_user) +{ + .accounting = IOPT_PAGES_ACCOUNT_USER, +}; + +FIXTURE_VARIANT_ADD(change_process, account_mm) +{ + .accounting = IOPT_PAGES_ACCOUNT_MM, +}; + +TEST_F(change_process, basic) +{ + pid_t parent = getpid(); + pid_t child; + __u64 iova; + struct iommu_ioas_change_process cmd = { + .size = sizeof(cmd), + }; + + /* Expect failure if non-file maps exist */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + test_ioctl_ioas_unmap(iova, PAGE_SIZE); + + /* Change process works in current process. */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Change process works in another process */ + child = fork(); + if (!child) { + int nlock = PAGE_SIZE / 1024; + + /* Parent accounts for locked memory before */ + ASSERT_EQ(nlock, get_vm_pinned(parent)); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(parent)); + ASSERT_EQ(0, get_vm_pinned(getpid())); + ASSERT_EQ(0, get_vm_locked(getpid())); + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Child accounts for locked memory after */ + ASSERT_EQ(0, get_vm_pinned(parent)); + ASSERT_EQ(0, get_vm_locked(parent)); + ASSERT_EQ(nlock, get_vm_pinned(getpid())); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(getpid())); + + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); +} + FIXTURE(iommufd_ioas) { int fd; @@ -220,6 +368,8 @@ FIXTURE_SETUP(iommufd_ioas) for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, &self->device_id); + test_cmd_dev_check_cache_all(self->device_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); self->base_iova = MOCK_APERTURE_START; } } @@ -360,9 +510,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); - /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + /* hwpt_invalidate does not support a parent hwpt */ num_inv = 1; - test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + test_err_hwpt_invalidate(EINVAL, parent_hwpt_id, inv_reqs, IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, sizeof(*inv_reqs), &num_inv); assert(!num_inv); @@ -1372,6 +1522,7 @@ FIXTURE_VARIANT(iommufd_mock_domain) { unsigned int mock_domains; bool hugepages; + bool file; }; FIXTURE_SETUP(iommufd_mock_domain) @@ -1384,9 +1535,12 @@ FIXTURE_SETUP(iommufd_mock_domain) ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); - for (i = 0; i != variant->mock_domains; i++) + for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], &self->hwpt_ids[i], &self->idev_ids[i]); + test_cmd_dev_check_cache_all(self->idev_ids[0], + IOMMU_TEST_DEV_CACHE_DEFAULT); + } self->hwpt_id = self->hwpt_ids[0]; self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; @@ -1410,26 +1564,45 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) { .mock_domains = 1, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) { .mock_domains = 2, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) { .mock_domains = 1, .hugepages = true, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) { .mock_domains = 2, .hugepages = true, + .file = false, }; +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file) +{ + .mock_domains = 1, + .hugepages = false, + .file = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file_hugepage) +{ + .mock_domains = 1, + .hugepages = true, + .file = true, +}; + + /* Have the kernel check that the user pages made it to the iommu_domain */ #define check_mock_iova(_ptr, _iova, _length) \ ({ \ @@ -1455,7 +1628,10 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) } \ }) -TEST_F(iommufd_mock_domain, basic) +static void +test_basic_mmap(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) { size_t buf_size = self->mmap_buf_size; uint8_t *buf; @@ -1478,6 +1654,40 @@ TEST_F(iommufd_mock_domain, basic) test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); } +static void +test_basic_file(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + int mfd_tmp; + int prot = PROT_READ | PROT_WRITE; + + /* Simple one page map */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + check_mock_iova(mfd_buffer, iova, PAGE_SIZE); + + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd_tmp); + ASSERT_NE(MAP_FAILED, buf); + + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size + 1, &iova); + + ASSERT_EQ(0, ftruncate(mfd_tmp, 0)); + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size, &iova); + + close(mfd_tmp); +} + +TEST_F(iommufd_mock_domain, basic) +{ + if (variant->file) + test_basic_file(_metadata, self, variant); + else + test_basic_mmap(_metadata, self, variant); +} + TEST_F(iommufd_mock_domain, ro_unshare) { uint8_t *buf; @@ -1513,9 +1723,13 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1532,7 +1746,12 @@ TEST_F(iommufd_mock_domain, all_aligns) size_t length = end - start; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } check_mock_iova(buf + start, iova, length); check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, end / PAGE_SIZE * PAGE_SIZE - @@ -1544,6 +1763,8 @@ TEST_F(iommufd_mock_domain, all_aligns) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, all_aligns_copy) @@ -1554,9 +1775,13 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1575,7 +1800,12 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) uint32_t mock_stdev_id; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } /* Add and destroy a domain while the area exists */ old_id = self->hwpt_ids[1]; @@ -1596,15 +1826,18 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, user_copy) { + void *buf = variant->file ? mfd_buffer : buffer; struct iommu_test_cmd access_cmd = { .size = sizeof(access_cmd), .op = IOMMU_TEST_OP_ACCESS_PAGES, .access_pages = { .length = BUFFER_SIZE, - .uptr = (uintptr_t)buffer }, + .uptr = (uintptr_t)buf }, }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), @@ -1623,9 +1856,13 @@ TEST_F(iommufd_mock_domain, user_copy) /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ test_ioctl_ioas_alloc(&ioas_id); - test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); - + if (variant->file) { + test_ioctl_ioas_map_id_file(ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_create_access(ioas_id, &access_cmd.id, MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); @@ -1635,12 +1872,17 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); /* Now replace the ioas with a new one */ test_ioctl_ioas_alloc(&new_ioas_id); - test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); + if (variant->file) { + test_ioctl_ioas_map_id_file(new_ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(new_ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); /* Destroy the old ioas and cleanup copied mapping */ @@ -1654,7 +1896,7 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = new_ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); test_cmd_destroy_access_pages( access_cmd.id, access_cmd.access_pages.out_access_pages_id); @@ -2386,4 +2628,332 @@ TEST_F(vfio_compat_mock_domain, huge_map) } } +FIXTURE(iommufd_viommu) +{ + int fd; + uint32_t ioas_id; + uint32_t stdev_id; + uint32_t hwpt_id; + uint32_t nested_hwpt_id; + uint32_t device_id; + uint32_t viommu_id; +}; + +FIXTURE_VARIANT(iommufd_viommu) +{ + unsigned int viommu; +}; + +FIXTURE_SETUP(iommufd_viommu) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + test_ioctl_set_default_memory_limit(); + + if (variant->viommu) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, NULL, + &self->device_id); + + /* Allocate a nesting parent hwpt */ + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &self->hwpt_id); + + /* Allocate a vIOMMU taking refcount of the parent hwpt */ + test_cmd_viommu_alloc(self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, + &self->viommu_id); + + /* Allocate a regular nested hwpt */ + test_cmd_hwpt_alloc_nested(self->device_id, self->viommu_id, 0, + &self->nested_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + } +} + +FIXTURE_TEARDOWN(iommufd_viommu) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_viommu, no_viommu) +{ + .viommu = 0, +}; + +FIXTURE_VARIANT_ADD(iommufd_viommu, mock_viommu) +{ + .viommu = 1, +}; + +TEST_F(iommufd_viommu, viommu_auto_destroy) +{ +} + +TEST_F(iommufd_viommu, viommu_negative_tests) +{ + uint32_t device_id = self->device_id; + uint32_t ioas_id = self->ioas_id; + uint32_t hwpt_id; + + if (self->device_id) { + /* Negative test -- invalid hwpt (hwpt_id=0) */ + test_err_viommu_alloc(ENOENT, device_id, 0, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + + /* Negative test -- not a nesting parent hwpt */ + test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id); + test_err_viommu_alloc(EINVAL, device_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + test_ioctl_destroy(hwpt_id); + + /* Negative test -- unsupported viommu type */ + test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id, + 0xdead, NULL); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->hwpt_id)); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->viommu_id)); + } else { + test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + } +} + +TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) +{ + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; + + if (self->device_id) { + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_err_hwpt_alloc_iopf( + ENOENT, dev_id, viommu_id, UINT32_MAX, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf( + EOPNOTSUPP, dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf( + dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + test_cmd_trigger_iopf(dev_id, fault_fd); + + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); + } +} + +TEST_F(iommufd_viommu, vdevice_alloc) +{ + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + + if (dev_id) { + /* Set vdev_id to 0x99, unset it, and set to 0x88 */ + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, + &vdev_id); + test_ioctl_destroy(vdev_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id); + test_ioctl_destroy(vdev_id); + } else { + test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL); + } +} + +TEST_F(iommufd_viommu, vdevice_cache) +{ + struct iommu_viommu_invalidate_selftest inv_reqs[2] = {}; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + uint32_t num_inv; + + if (dev_id) { + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + + test_cmd_dev_check_cache_all(dev_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 1, &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, NULL, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 0, &num_inv); + assert(!num_inv); + + /* Negative test: invalid cache_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid vdev_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x9; + inv_reqs[0].cache_id = 0; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 1; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid cache_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 2nd cache entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 1; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, 0); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 3rd and 4th cache entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 3; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 2); + test_cmd_dev_check_cache_all(dev_id, 0); + + /* Invalidate all cache entries for nested_dev_id[1] and verify */ + num_inv = 1; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache_all(dev_id, 0); + test_ioctl_destroy(vdev_id); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index c5d5e69452b0..64b1f8e1b0cf 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -47,6 +47,9 @@ static __attribute__((constructor)) void setup_buffer(void) buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } /* @@ -331,6 +334,42 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) return 0; } +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_file_domain) +{ + uint32_t ioas_id; + __u32 stdev_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + + if (_test_ioctl_ioas_map_file(self->fd, ioas_id, mfd, 0, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, stdev_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + return 0; +} + TEST_FAIL_NTH(basic_fail_nth, map_two_domains) { uint32_t ioas_id; @@ -576,12 +615,19 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) /* device.c */ TEST_FAIL_NTH(basic_fail_nth, device) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; struct iommu_test_hw_info info; + uint32_t fault_id, fault_fd; + uint32_t fault_hwpt_id; uint32_t ioas_id; uint32_t ioas_id2; uint32_t stdev_id; uint32_t idev_id; uint32_t hwpt_id; + uint32_t viommu_id; + uint32_t vdev_id; __u64 iova; self->fd = open("/dev/iommu", O_RDWR); @@ -624,6 +670,28 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) return -1; + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) + return -1; + + if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id)) + return -1; + + if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) + return -1; + + if (_test_ioctl_fault_alloc(self->fd, &fault_id, &fault_fd)) + return -1; + close(fault_fd); + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &fault_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data))) + return -1; + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 40f6f14ce136..d979f5b0efe8 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,12 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +enum { + IOPT_PAGES_ACCOUNT_NONE = 0, + IOPT_PAGES_ACCOUNT_USER = 1, + IOPT_PAGES_ACCOUNT_MM = 2, +}; + #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) static inline void set_bit(unsigned int nr, unsigned long *addr) @@ -40,12 +46,28 @@ static inline bool test_bit(unsigned int nr, unsigned long *addr) static void *buffer; static unsigned long BUFFER_SIZE; +static void *mfd_buffer; +static int mfd; + static unsigned long PAGE_SIZE; #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) +{ + int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; + int mfd = memfd_create("buffer", mfd_flags); + + if (mfd <= 0) + return MAP_FAILED; + if (ftruncate(mfd, length)) + return MAP_FAILED; + *mfd_p = mfd; + return mmap(0, length, prot, flags, mfd, 0); +} + /* * Have the kernel check the refcount on pages. I don't know why a freshly * mmap'd anon non-compound page starts out with a ref of 3 @@ -234,6 +256,30 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \ }) +#define test_cmd_dev_check_cache(device_id, cache_id, expected) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_DEV_CHECK_CACHE, \ + .id = device_id, \ + .check_dev_cache = { \ + .id = cache_id, \ + .cache = expected, \ + }, \ + }; \ + ASSERT_EQ(0, ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_DEV_CHECK_CACHE), \ + &test_cmd)); \ + }) + +#define test_cmd_dev_check_cache_all(device_id, expected) \ + ({ \ + int c; \ + for (c = 0; c < MOCK_DEV_CACHE_NUM; c++) \ + test_cmd_dev_check_cache(device_id, c, expected); \ + }) + static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, uint32_t data_type, uint32_t lreq, uint32_t *nreqs) @@ -265,6 +311,38 @@ static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, data_type, lreq, nreqs)); \ }) +static int _test_cmd_viommu_invalidate(int fd, __u32 viommu_id, void *reqs, + uint32_t data_type, uint32_t lreq, + uint32_t *nreqs) +{ + struct iommu_hwpt_invalidate cmd = { + .size = sizeof(cmd), + .hwpt_id = viommu_id, + .data_type = data_type, + .data_uptr = (uint64_t)reqs, + .entry_len = lreq, + .entry_num = *nreqs, + }; + int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd); + *nreqs = cmd.entry_num; + return rc; +} + +#define test_cmd_viommu_invalidate(viommu, reqs, lreq, nreqs) \ + ({ \ + ASSERT_EQ(0, \ + _test_cmd_viommu_invalidate(self->fd, viommu, reqs, \ + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, \ + lreq, nreqs)); \ + }) +#define test_err_viommu_invalidate(_errno, viommu_id, reqs, data_type, lreq, \ + nreqs) \ + ({ \ + EXPECT_ERRNO(_errno, _test_cmd_viommu_invalidate( \ + self->fd, viommu_id, reqs, \ + data_type, lreq, nreqs)); \ + }) + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) { @@ -589,6 +667,47 @@ static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ iova, length, NULL)) +static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd, + size_t start, size_t length, __u64 *iova, + unsigned int flags) +{ + struct iommu_ioas_map_file cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &cmd); + *iova = cmd.iova; + return ret; +} + +#define test_ioctl_ioas_map_file(mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map_file(_errno, mfd, start, length, iova_p) \ + EXPECT_ERRNO( \ + _errno, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id_file(ioas_id, mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) { struct iommu_test_cmd memlimit_cmd = { @@ -762,3 +881,58 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) #define test_cmd_trigger_iopf(device_id, fault_fd) \ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) + +static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, + __u32 type, __u32 flags, __u32 *viommu_id) +{ + struct iommu_viommu_alloc cmd = { + .size = sizeof(cmd), + .flags = flags, + .type = type, + .dev_id = device_id, + .hwpt_id = hwpt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &cmd); + if (ret) + return ret; + if (viommu_id) + *viommu_id = cmd.out_viommu_id; + return 0; +} + +#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \ + ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) +#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) + +static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, + __u64 virt_id, __u32 *vdev_id) +{ + struct iommu_vdevice_alloc cmd = { + .size = sizeof(cmd), + .dev_id = idev_id, + .viommu_id = viommu_id, + .virt_id = virt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &cmd); + if (ret) + return ret; + if (vdev_id) + *vdev_id = cmd.out_vdevice_id; + return 0; +} + +#define test_cmd_vdevice_alloc(viommu_id, idev_id, virt_id, vdev_id) \ + ASSERT_EQ(0, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) +#define test_err_vdevice_alloc(_errno, viommu_id, idev_id, virt_id, vdev_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..05a461890671 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -7,6 +7,7 @@ KTAP_TESTNO=1 KTAP_CNT_PASS=0 KTAP_CNT_FAIL=0 +KTAP_CNT_XFAIL=0 KTAP_CNT_SKIP=0 KSFT_PASS=0 @@ -69,6 +70,16 @@ ktap_test_skip() { KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1)) } +ktap_test_xfail() { + description="$1" + + result="ok" + directive="XFAIL" + __ktap_test "$result" "$description" "$directive" + + KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1)) +} + ktap_test_fail() { description="$1" @@ -99,7 +110,7 @@ ktap_exit_fail_msg() { ktap_finished() { ktap_print_totals - if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then + if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then exit "$KSFT_PASS" else exit "$KSFT_FAIL" @@ -107,5 +118,5 @@ ktap_finished() { } ktap_print_totals() { - echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0" + echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0" } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48645a2e29da..41593d2e7de9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -55,6 +55,7 @@ LIBKVM_aarch64 += lib/aarch64/vgic.c LIBKVM_s390x += lib/s390x/diag318_test_handler.c LIBKVM_s390x += lib/s390x/processor.c LIBKVM_s390x += lib/s390x/ucall.c +LIBKVM_s390x += lib/s390x/facility.c LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c @@ -67,7 +68,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features +TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test @@ -156,6 +157,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs @@ -189,6 +191,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test +TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 2582c49e525a..ff7a949fc96a 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -433,15 +433,15 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_BRK_INS, guest_sw_bp_handler); + ESR_ELx_EC_BRK64, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_WP_CURRENT, guest_wp_handler); + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SSTEP_CURRENT, guest_ss_handler); + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SVC64, guest_svc_handler); + ESR_ELx_EC_SVC64, guest_svc_handler); /* Specify bpn/wpn/ctx_bpn to be tested */ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c new file mode 100644 index 000000000000..8b7a80a51b1c --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/mmio_abort.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mmio_abort - Tests for userspace MMIO abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +extern char test_mmio_abort_insn; + +static void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); +} diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c index 943d65fc6b0b..58304bbc2036 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -150,7 +150,7 @@ static void test_guest_no_gicv3(void) vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_UNKNOWN, guest_undef_handler); + ESR_ELx_EC_UNKNOWN, guest_undef_handler); test_run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index d29b08198b42..ec33a8f9c908 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -544,9 +544,9 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_DABT, no_dabt_handler); + ESR_ELx_EC_DABT_CUR, no_dabt_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_IABT, no_iabt_handler); + ESR_ELx_EC_IABT_CUR, no_iabt_handler); } static void setup_gva_maps(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 61731a950def..eaa7655fefc1 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -54,6 +54,15 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) return res.a0; } +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; @@ -188,11 +197,94 @@ static void host_test_system_suspend(void) kvm_vm_free(vm); } +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + int main(void) { TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); + host_test_system_off2(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index b87e53580bfc..3a97c160b5fe 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -152,7 +152,6 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), REG_FTR_END, }; @@ -443,6 +442,101 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) } } +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -581,12 +675,14 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2; + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + + MPAM_IDREG_TEST; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index d31b9f64ba14..f9c0c86d7e85 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -300,7 +300,7 @@ static void guest_sync_handler(struct ex_regs *regs) uint64_t esr, ec; esr = read_sysreg(esr_el1); - ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(esr); __GUEST_ASSERT(expected_ec == ec, "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", @@ -338,10 +338,10 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * Reading/writing the event count/type registers should cause * an UNDEFINED exception. */ - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); /* * The bit corresponding to the (unimplemented) counter in * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. @@ -425,7 +425,7 @@ static void create_vpmu_vm(void *guest_code) vpmu_vm.vm = vm_create(1); vm_init_descriptor_tables(vpmu_vm.vm); - for (ec = 0; ec < ESR_EC_NUM; ec++) { + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, guest_sync_handler); } diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index bce73bcb973c..94bd6ed24cf3 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -20,7 +20,6 @@ #define SLEEPING_THREAD_NUM (1 << 4) #define FORK_NUM (1ULL << 9) #define DELAY_US_MAX 2000 -#define GUEST_CODE_PIO_PORT 4 sem_t *sem; diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index de977d131082..1e8d0d531fbd 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -12,6 +12,8 @@ #include <linux/stringify.h> #include <linux/types.h> +#include <asm/brk-imm.h> +#include <asm/esr.h> #include <asm/sysreg.h> @@ -100,19 +102,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -#define ESR_EC_NUM 64 -#define ESR_EC_SHIFT 26 -#define ESR_EC_MASK (ESR_EC_NUM - 1) - -#define ESR_EC_UNKNOWN 0x0 -#define ESR_EC_SVC64 0x15 -#define ESR_EC_IABT 0x21 -#define ESR_EC_DABT 0x25 -#define ESR_EC_HW_BP_CURRENT 0x31 -#define ESR_EC_SSTEP_CURRENT 0x33 -#define ESR_EC_WP_CURRENT 0x35 -#define ESR_EC_BRK_INS 0x3c - /* Access flag */ #define PTE_AF (1ULL << 10) diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include <linux/bitops.h> + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 481bd2fd6a32..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -32,4 +32,10 @@ static inline void cpu_relax(void) barrier(); } +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e247f99e0473..645200e95f89 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1049,6 +1049,11 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); } +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index fe4dc3693112..698e34f39241 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -450,7 +450,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) } struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; }; void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) @@ -469,7 +469,7 @@ void route_exception(struct ex_regs *regs, int vector) switch (vector) { case VECTOR_SYNC_CURRENT: case VECTOR_SYNC_LOWER_64: - ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(read_sysreg(esr_el1)); valid_ec = true; break; case VECTOR_IRQ_CURRENT: @@ -508,7 +508,7 @@ void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, assert(VECTOR_IS_SYNC(vector)); assert(vector < VECTOR_NUM); - assert(ec < ESR_EC_NUM); + assert(ec <= ESR_ELx_EC_MAX); handlers->exception_handlers[vector][ec] = handler; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b7df5f1d39..480e3a40d197 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -720,9 +720,6 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); - region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); @@ -1197,7 +1194,12 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot)); + struct userspace_mem_region *region = memslot2region(vm, slot); + + region->region.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + __vm_mem_region_delete(vm, region); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 974bcd2df6d7..636b29ba8985 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -506,6 +506,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -519,6 +521,20 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -675,6 +691,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -686,6 +703,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8e34f7fa44e9..4bc1051848e5 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -41,10 +41,14 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSNPM: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: @@ -414,10 +418,14 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(I), KVM_ISA_EXT_ARR(M), KVM_ISA_EXT_ARR(V), + KVM_ISA_EXT_ARR(SMNPM), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -946,9 +954,13 @@ KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); +KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); +KVM_ISA_EXT_SIMPLE_CONFIG(ssnpm, SSNPM); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE); +KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); @@ -1009,9 +1021,13 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_f, &config_fp_d, &config_h, + &config_smnpm, &config_smstateen, &config_sscofpmf, + &config_ssnpm, &config_sstc, + &config_svade, + &config_svadu, &config_svinval, &config_svnapot, &config_svpbmt, diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..27255880dabd --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not avaialable\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c index f257beec1430..135ee22856cf 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -16,7 +16,11 @@ #include <linux/capability.h> #include <linux/sizes.h> +#define PGM_SEGMENT_TRANSLATION 0x10 + #define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) /* so directly declare capget to check caps without libcap */ int capget(cap_user_header_t header, cap_user_data_t data); @@ -58,6 +62,50 @@ asm("test_gprs_asm:\n" " j 0b\n" ); +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + FIXTURE(uc_kvm) { struct kvm_s390_sie_block *sie_block; @@ -67,6 +115,7 @@ FIXTURE(uc_kvm) uintptr_t base_hva; uintptr_t code_hva; int kvm_run_size; + vm_paddr_t pgd; void *vm_mem; int vcpu_fd; int kvm_fd; @@ -116,7 +165,7 @@ FIXTURE_SETUP(uc_kvm) self->base_gpa = 0; self->code_gpa = self->base_gpa + (3 * SZ_1M); - self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); self->base_hva = (uintptr_t)self->vm_mem; self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; @@ -161,10 +210,13 @@ TEST_F(uc_kvm, uc_attr_mem_limit) struct kvm_device_attr attr = { .group = KVM_S390_VM_MEM_CTRL, .attr = KVM_S390_VM_MEM_LIMIT_SIZE, - .addr = (unsigned long)&limit, + .addr = (u64)&limit, }; int rc; + rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); EXPECT_EQ(0, rc); EXPECT_EQ(~0UL, limit); @@ -222,16 +274,112 @@ TEST(uc_cap_hpage) close(kvm_fd); } -/* verify SIEIC exit +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued */ -static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; struct kvm_run *run = self->run; /* check SIE interception code */ - pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -239,7 +387,10 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) case ICPT_INST: /* end execution in caller on intercepted instruction */ pr_info("sie instruction interception\n"); - return false; + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; case ICPT_OPEREXC: /* operation exception */ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); @@ -250,11 +401,17 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) } /* verify VM state on exit */ -static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) { struct kvm_run *run = self->run; switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; case KVM_EXIT_S390_SIEIC: return uc_handle_sieic(self); default: @@ -264,7 +421,7 @@ static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) } /* run the VM until interrupted */ -static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) { int rc; @@ -275,7 +432,7 @@ static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) return rc; } -static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; @@ -286,6 +443,89 @@ static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) TEST_ASSERT_EQ(0x440000, sie_block->ipb); } +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + TEST_F(uc_kvm, uc_gprs) { struct kvm_sync_regs *sync_regs = &self->run->s.regs; @@ -329,4 +569,240 @@ TEST_F(uc_kvm, uc_gprs) ASSERT_EQ(1, sync_regs->gprs[0]); } +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* ISKE */ + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb229); + + /* ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + /* assert initial skey (ACC = 0, R & C = 1) */ + ASSERT_EQ(0x06, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(4, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + +static char uc_flic_b[PAGE_SIZE]; +static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 }; +static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 }; +static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 }; +static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 }; +static struct uc_flic_attr_test { + char *name; + struct kvm_device_attr a; + int hasrc; + int geterrno; + int seterrno; +} uc_flic_attr_tests[] = { + { + .name = "KVM_DEV_FLIC_GET_ALL_IRQS", + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_GET_ALL_IRQS, + .addr = (u64)&uc_flic_b, + .attr = PAGE_SIZE, + }, + }, + { + .name = "KVM_DEV_FLIC_ENQUEUE", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_ENQUEUE, }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IRQS", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_REGISTER", + .geterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_REGISTER, + .addr = (u64)&uc_flic_ioa, + }, + }, + { + .name = "KVM_DEV_FLIC_ADAPTER_MODIFY", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_ADAPTER_MODIFY, + .addr = (u64)&uc_flic_ioam, + .attr = sizeof(uc_flic_ioam), + }, + }, + { + .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { + .group = KVM_DEV_FLIC_CLEAR_IO_IRQ, + .attr = 32, + }, + }, + { + .name = "KVM_DEV_FLIC_AISM", + .geterrno = EINVAL, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM, + .addr = (u64)&uc_flic_asim, + }, + }, + { + .name = "KVM_DEV_FLIC_AIRQ_INJECT", + .geterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, }, + }, + { + .name = "KVM_DEV_FLIC_AISM_ALL", + .geterrno = ENOTSUP, + .seterrno = ENOTSUP, + .a = { + .group = KVM_DEV_FLIC_AISM_ALL, + .addr = (u64)&uc_flic_asima, + .attr = sizeof(uc_flic_asima), + }, + }, + { + .name = "KVM_DEV_FLIC_APF_ENABLE", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_ENABLE, }, + }, + { + .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT", + .geterrno = EINVAL, + .seterrno = EINVAL, + .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, }, + }, +}; + +TEST_F(uc_kvm, uc_flic_attrs) +{ + struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC }; + struct kvm_device_attr attr; + u64 value; + int rc, i; + + rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd); + ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)", + strerror(errno), errno); + + for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) { + TH_LOG("test %s", uc_flic_attr_tests[i].name); + attr = (struct kvm_device_attr) { + .group = uc_flic_attr_tests[i].a.group, + .attr = uc_flic_attr_tests[i].a.attr, + .addr = uc_flic_attr_tests[i].a.addr, + }; + if (attr.addr == 0) + attr.addr = (u64)&value; + + rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr); + EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc) + TH_LOG("expected dev attr missing %s", + uc_flic_attr_tests[i].name); + + rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc) + TH_LOG("get dev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].geterrno) + EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno) + TH_LOG("get dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + + rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc) + TH_LOG("set sev attr rc not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + if (uc_flic_attr_tests[i].seterrno) + EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno) + TH_LOG("set dev attr errno not expected on %s %s (%i)", + uc_flic_attr_tests[i].name, + strerror(errno), errno); + } + + close(cd.fd); +} + +TEST_F(uc_kvm, uc_set_gsi_routing) +{ + struct kvm_irq_routing *routing = kvm_gsi_routing_create(); + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + .u.adapter = (struct kvm_irq_routing_s390_adapter) { + .ind_addr = 0, + }, + }; + int rc; + + routing->entries[0] = ue; + routing->nr = 1; + rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing); + ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno); + ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 903940c54d2d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -86,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -122,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index fec03b11b059..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -12,17 +12,16 @@ #include "kvm_util.h" #include "processor.h" -/* CPUIDs known to differ */ -struct { - u32 function; - u32 index; -} mangled_cpuids[] = { - /* - * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, - * which are not controlled for by this test. - */ - {.function = 0xd, .index = 0}, - {.function = 0xd, .index = 1}, +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) @@ -56,17 +55,29 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_DONE(); } -static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) { - int i; - - for (i = 0; i < ARRAY_SIZE(mangled_cpuids); i++) { - if (mangled_cpuids[i].function == entrie->function && - mangled_cpuids[i].index == entrie->index) - return true; + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; } - - return false; + return mask; } static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, @@ -79,6 +90,8 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + e1 = &cpuid1->entries[i]; e2 = &cpuid2->entries[i]; @@ -88,15 +101,19 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); - if (is_cpuid_mangled(e1)) - continue; + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); - TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && - e1->ecx == e2->ecx && e1->edx == e2->edx, + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", e1->function, e1->index, - e1->eax, e1->ebx, e1->ecx, e1->edx, - e2->eax, e2->ebx, e2->ecx, e2->edx); + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); } } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 624dc725e14d..28cc66454601 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -19,30 +19,42 @@ #include "kvm_util.h" #include "processor.h" -static inline bool cr4_cpuid_is_sync(void) -{ - uint64_t cr4 = get_cr4(); - - return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); -} +#define MAGIC_HYPERCALL_PORT 0x80 static void guest_code(void) { - uint64_t cr4; + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); GUEST_DONE(); } @@ -62,13 +74,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { /* emulate hypervisor clearing CR4.OSXSAVE */ vcpu_sregs_get(vcpu, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; vcpu_sregs_set(vcpu, &sregs); - break; + continue; + } + + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); break; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 76cc2df9238a..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -166,7 +166,7 @@ int main(void) /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { target_rip += ss_size[i]; memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eda88080c186..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,8 +72,6 @@ int main(int argc, char *argv[]) } done: - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 2e9197eb1652..ae77698e6e97 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -41,8 +41,8 @@ static void guest_sev_code(void) /* Stash state passed via VMSA before any compiled code runs. */ extern void guest_code_xsave(void); asm("guest_code_xsave:\n" - "mov $-1, %eax\n" - "mov $-1, %edx\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" "xsave (%rdi)\n" "jmp guest_sev_es_code"); @@ -70,12 +70,6 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - struct kvm_sregs sregs; - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = XFEATURE_MASK_X87_AVX, - }; vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, @@ -84,11 +78,6 @@ static void test_sync_vmsa(uint32_t policy) vcpu_args_set(vcpu, 1, gva); - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_xcrs_set(vcpu, &xcrs); asm("fninit\n" "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" "fldl %3\n" @@ -192,6 +181,8 @@ static void test_sev_es_shutdown(void) int main(int argc, char *argv[]) { + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); test_sev(guest_sev_code, SEV_POLICY_NO_DBG); @@ -204,7 +195,7 @@ int main(int argc, char *argv[]) test_sev_es_shutdown(); if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { test_sync_vmsa(0); test_sync_vmsa(SEV_POLICY_NO_DBG); } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 1c756db329e5..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -145,11 +145,6 @@ static void __attribute__((__flatten__)) guest_code(void *arg) memset(buffer, 0xcc, sizeof(buffer)); - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xsetbv(0, xgetbv(0) | supported_xcr0); - /* * Modify state for all supported xfeatures to take them out of * their "init" state, i.e. to make them show up in XSTATE_BV. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 7c92536551cc..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -207,6 +207,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); } +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_is_pmu_enabled()); diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 95ce192d0753..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -48,16 +48,16 @@ do { \ static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -79,6 +79,11 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got vector '0x%x'", + vector); + vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, "Expected success on XSETBV(0x%lx), got vector '0x%x'", diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index 37bbc3fb2780..2a03deb26a12 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -259,7 +259,7 @@ $MOD_TARGET: ${MOD_TARGET}_init % insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19 livepatch: enabling patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': initializing patching transition -test_klp_callbacks_demo: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: pre_patch_callback: vmlinux livepatch: pre-patch callback failed for object 'vmlinux' livepatch: failed to enable patch '$MOD_LIVEPATCH' livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 2c91428d2997..58fe1d96997c 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -5,6 +5,8 @@ . $(dirname $0)/functions.sh MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH2=test_klp_callbacks_demo +MOD_LIVEPATCH3=test_klp_syscall setup_config @@ -19,6 +21,8 @@ check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--" check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1" check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------" check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--" check_sysfs_value "$MOD_LIVEPATCH" "transition" "0" check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--" @@ -131,4 +135,71 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition livepatch: '$MOD_LIVEPATCH': unpatching complete % rmmod $MOD_LIVEPATCH" +start_test "sysfs test stack_order value" + +load_lp $MOD_LIVEPATCH + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" + +load_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2" + +load_lp $MOD_LIVEPATCH3 + +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3" + +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 + +check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1" +check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2" + +disable_lp $MOD_LIVEPATCH3 +unload_lp $MOD_LIVEPATCH3 + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% insmod test_modules/$MOD_LIVEPATCH2.ko +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': patching complete +% insmod test_modules/$MOD_LIVEPATCH3.ko +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +livepatch: '$MOD_LIVEPATCH3': patching complete +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled +livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH3': starting unpatching transition +livepatch: '$MOD_LIVEPATCH3': completing unpatching transition +livepatch: '$MOD_LIVEPATCH3': unpatching complete +% rmmod $MOD_LIVEPATCH3 +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c index 66dec47e3ca3..732e89fe99c0 100644 --- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -56,16 +56,15 @@ TEST(flags_zero_lsm_set_self_attr) TEST(flags_overset_lsm_set_self_attr) { const long page_size = sysconf(_SC_PAGESIZE); - char *ctx = calloc(page_size, 1); + struct lsm_ctx *ctx = calloc(page_size, 1); __u32 size = page_size; - struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; ASSERT_NE(NULL, ctx); if (attr_lsm_count()) { - ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); } - ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, size, 0)); free(ctx); diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 95af2d78fd31..c0c53451a16d 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -9,6 +9,7 @@ #include <fcntl.h> #include <linux/memfd.h> #include <sched.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <signal.h> @@ -281,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd) return p; } +static void *mfd_assert_mmap_read_shared(int fd) +{ + void *p; + + p = mmap(NULL, + mfd_def_size, + PROT_READ, + MAP_SHARED, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + + return p; +} + static void *mfd_assert_mmap_private(int fd) { void *p; @@ -979,6 +998,30 @@ static void test_seal_future_write(void) close(fd); } +static void test_seal_write_map_read_shared(void) +{ + int fd; + void *p; + + printf("%s SEAL-WRITE-MAP-READ\n", memfd_str); + + fd = mfd_assert_new("kern_memfd_seal_write_map_read", + mfd_def_size, + MFD_CLOEXEC | MFD_ALLOW_SEALING); + + mfd_assert_add_seals(fd, F_SEAL_WRITE); + mfd_assert_has_seals(fd, F_SEAL_WRITE); + + p = mfd_assert_mmap_read_shared(fd); + + mfd_assert_read(fd); + mfd_assert_read_shared(fd); + mfd_fail_write(fd); + + munmap(p, mfd_def_size); + close(fd); +} + /* * Test SEAL_SHRINK * Test whether SEAL_SHRINK actually prevents shrinking @@ -1557,6 +1600,11 @@ static void test_share_fork(char *banner, char *b_suffix) close(fd); } +static bool pid_ns_supported(void) +{ + return access("/proc/self/ns/pid", F_OK) == 0; +} + int main(int argc, char **argv) { pid_t pid; @@ -1587,12 +1635,17 @@ int main(int argc, char **argv) test_seal_write(); test_seal_future_write(); + test_seal_write_map_read_shared(); test_seal_shrink(); test_seal_grow(); test_seal_resize(); - test_sysctl_simple(); - test_sysctl_nested(); + if (pid_ns_supported()) { + test_sysctl_simple(); + test_sysctl_nested(); + } else { + printf("PID namespaces are not supported; skipping sysctl tests\n"); + } test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index da030b43e43b..8f01f4da1c0d 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -51,3 +51,7 @@ hugetlb_madv_vs_map mseal_test seal_elf droppable +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 +guard-pages diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 321e63955272..3de23ea4663f 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -90,6 +90,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable +TEST_GEN_FILES += guard-pages ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -126,7 +127,9 @@ endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch +ifneq ($(ARCH),riscv64) TEST_GEN_FILES += virtual_address_range +endif TEST_GEN_FILES += write_to_hugetlbfs endif diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 32c6ccc2a6be..1238e1c5aae1 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -758,7 +758,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout) } /* Populate a base page. */ - memset(mem, 0, pagesize); + memset(mem, 1, pagesize); if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); @@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) * Try to populate a THP. Touch the first sub-page and test if * we get the last sub-page populated automatically. */ - mem[0] = 0; + mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } - memset(mem, 0, thpsize); + memset(mem, 1, thpsize); size = thpsize; switch (thp_run) { @@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) } /* Populate an huge page. */ - memset(mem, 0, hugetlbsize); + memset(mem, 1, hugetlbsize); /* * We need a total of two hugetlb pages to handle COW/unsharing diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c new file mode 100644 index 000000000000..7cdf815d0d63 --- /dev/null +++ b/tools/testing/selftests/mm/guard-pages.c @@ -0,0 +1,1243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <asm-generic/mman.h> /* Force the import of the tools version. */ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <unistd.h> + +/* + * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: + * + * "If the signal occurs other than as the result of calling the abort or raise + * function, the behavior is undefined if the signal handler refers to any + * object with static storage duration other than by assigning a value to an + * object declared as volatile sig_atomic_t" + */ +static volatile sig_atomic_t signal_jump_set; +static sigjmp_buf signal_jmp_buf; + +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + +static int userfaultfd(int flags) +{ + return syscall(SYS_userfaultfd, flags); +} + +static void handle_fatal(int c) +{ + if (!signal_jump_set) + return; + + siglongjmp(signal_jmp_buf, c); +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +/* + * Enable our signal catcher and try to read/write the specified buffer. The + * return value indicates whether the read/write succeeds without a fatal + * signal. + */ +static bool try_access_buf(char *ptr, bool write) +{ + bool failed; + + /* Tell signal handler to jump back here on fatal signal. */ + signal_jump_set = true; + /* If a fatal signal arose, we will jump back here and failed is set. */ + failed = sigsetjmp(signal_jmp_buf, 0) != 0; + + if (!failed) { + if (write) + *ptr = 'x'; + else + FORCE_READ(ptr); + } + + signal_jump_set = false; + return !failed; +} + +/* Try and read from a buffer, return true if no fatal signal. */ +static bool try_read_buf(char *ptr) +{ + return try_access_buf(ptr, false); +} + +/* Try and write to a buffer, return true if no fatal signal. */ +static bool try_write_buf(char *ptr) +{ + return try_access_buf(ptr, true); +} + +/* + * Try and BOTH read from AND write to a buffer, return true if BOTH operations + * succeed. + */ +static bool try_read_write_buf(char *ptr) +{ + return try_read_buf(ptr) && try_write_buf(ptr); +} + +FIXTURE(guard_pages) +{ + unsigned long page_size; +}; + +FIXTURE_SETUP(guard_pages) +{ + struct sigaction act = { + .sa_handler = &handle_fatal, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + if (sigaction(SIGSEGV, &act, NULL)) + ksft_exit_fail_perror("sigaction"); + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); +}; + +FIXTURE_TEARDOWN(guard_pages) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, NULL); +} + +TEST_F(guard_pages, basic) +{ + const unsigned long NUM_PAGES = 10; + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Trivially assert we can touch the first page. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + + /* Establish that 1st page SIGSEGV's. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* Ensure we can touch everything else.*/ + for (i = 1; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Establish a guard page at the end of the mapping. */ + ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_INSTALL), 0); + + /* Check that both guard pages result in SIGSEGV. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* Remove the first guard page. */ + ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + /* Remove the last guard page. */ + ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* + * Test setting a _range_ of pages, namely the first 3. The first of + * these be faulted in, so this also tests that we can install guard + * pages over backed pages. + */ + ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure they are all guard pages. */ + for (i = 0; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Make sure the rest are not. */ + for (i = 3; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Remove guard pages. */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure we can touch everything. */ + for (i = 0; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * Now remove all guard pages, make sure we don't remove existing + * entries. + */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < NUM_PAGES * page_size; i += page_size) { + char chr = ptr[i]; + + ASSERT_EQ(chr, 'x'); + } + + ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0); +} + +/* Assert that operations applied across multiple VMAs work as expected. */ +TEST_F(guard_pages, multi_vma) +{ + const unsigned long page_size = self->page_size; + char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; + int i; + + /* Reserve a 100 page region over which we can install VMAs. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* Place a VMA of 10 pages size at the start of the region. */ + ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + /* Place a VMA of 5 pages size 50 pages into the region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Place a VMA of 20 pages size at the end of the region. */ + ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Unmap gaps. */ + ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---] [---] [---] + */ + + /* + * Now mark the whole range as guard pages and make sure all VMAs are as + * such. + */ + + /* + * madvise() is certifiable and lets you perform operations over gaps, + * everything works, but it indicates an error and errno is set to + * -ENOMEM. Also if anything runs out of memory it is set to + * -ENOMEM. You are meant to guess which is which. + */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now remove guar pages over range and assert the opposite. */ + + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Now map incompatible VMAs in the gaps. */ + ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---][xxxx][---][xxxx][---] + * + * Where 'x' signifies VMAs that cannot be merged with those adjacent to + * them. + */ + + /* Multiple VMAs adjacent to one another should result in no error. */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0); +} + +/* + * Assert that batched operations performed using process_madvise() work as + * expected. + */ +TEST_F(guard_pages, process_madvise) +{ + const unsigned long page_size = self->page_size; + pid_t pid = getpid(); + int pidfd = pidfd_open(pid, 0); + char *ptr_region, *ptr1, *ptr2, *ptr3; + ssize_t count; + struct iovec vec[6]; + + ASSERT_NE(pidfd, -1); + + /* Reserve region to map over. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* + * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we + * overwrite existing entries and test this code path against + * overwriting existing entries. + */ + ptr1 = mmap(&ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + /* We want guard markers at start/end of each VMA. */ + vec[0].iov_base = ptr1; + vec[0].iov_len = page_size; + vec[1].iov_base = &ptr1[9 * page_size]; + vec[1].iov_len = page_size; + + /* 5 pages offset 50 pages into reserve region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + vec[2].iov_base = ptr2; + vec[2].iov_len = page_size; + vec[3].iov_base = &ptr2[4 * page_size]; + vec[3].iov_len = page_size; + + /* 20 pages offset 79 pages into reserve region. */ + ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + vec[4].iov_base = ptr3; + vec[4].iov_len = page_size; + vec[5].iov_base = &ptr3[19 * page_size]; + vec[5].iov_len = page_size; + + /* Free surrounding VMAs. */ + ASSERT_EQ(munmap(ptr_region, page_size), 0); + ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); + + /* Now guard in one step. */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); + + /* OK we don't have permission to do this, skip. */ + if (count == -1 && errno == EPERM) + ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + + /* Returns the number of bytes advised. */ + ASSERT_EQ(count, 6 * page_size); + + /* Now make sure the guarding was applied. */ + + ASSERT_FALSE(try_read_write_buf(ptr1)); + ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr2)); + ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr3)); + ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Now do the same with unguard... */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); + + /* ...and everything should now succeed. */ + + ASSERT_TRUE(try_read_write_buf(ptr1)); + ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr2)); + ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr3)); + ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr1, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr2, 5 * page_size), 0); + ASSERT_EQ(munmap(ptr3, 20 * page_size), 0); + close(pidfd); +} + +/* Assert that unmapping ranges does not leave guard markers behind. */ +TEST_F(guard_pages, munmap) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new1, *ptr_new2; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard first and last pages. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Assert that they are guarded. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size])); + + /* Unmap them. */ + ASSERT_EQ(munmap(ptr, page_size), 0); + ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); + + /* Map over them.*/ + ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new1, MAP_FAILED); + ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new2, MAP_FAILED); + + /* Assert that they are now not guarded. */ + ASSERT_TRUE(try_read_write_buf(ptr_new1)); + ASSERT_TRUE(try_read_write_buf(ptr_new2)); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mprotect() operations have no bearing on guard markers. */ +TEST_F(guard_pages, mprotect) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the middle of the range. */ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Assert that it is indeed guarded. */ + ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size])); + + /* Now make these pages read-only. */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0); + + /* Make sure the range is still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Make sure we can guard again without issue.*/ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Make sure the range is, yet again, still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Now unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is readable. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Split and merge VMAs and make sure guard pages still behave. */ +TEST_F(guard_pages, split_merge) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now unmap some pages in the range so we split. */ + ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0); + + /* Make sure the remaining ranges are guarded post-split. */ + for (i = 0; i < 2; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 2; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 6; i < 8; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 9; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now map them again - the unmap will have cleared the guards. */ + ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now make sure guard pages are established. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + bool expect_true = i == 2 || i == 5 || i == 8; + + ASSERT_TRUE(expect_true ? result : !result); + } + + /* Now guard everything again. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now split the range into three. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* Make sure the whole range is guarded for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_buf(curr)); + } + + /* Now reset protection bits so we merge the whole thing. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Make sure the whole range is still guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Split range into 3 again... */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* ...and unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is remedied for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Merge them again. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Now ensure the merged range is remedied for read/write. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that MADV_DONTNEED does not remove guard markers. */ +TEST_F(guard_pages, dontneed) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Back the whole range. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Guard every other page. */ + for (i = 0; i < 10; i += 2) { + char *curr = &ptr[i * page_size]; + int res = madvise(curr, page_size, MADV_GUARD_INSTALL); + + ASSERT_EQ(res, 0); + } + + /* Indicate that we don't need any of the range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0); + + /* Check to ensure guard markers are still in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_buf(curr); + + if (i % 2 == 0) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + /* Make sure we really did get reset to zero page. */ + ASSERT_EQ(*curr, '\0'); + } + + /* Now write... */ + result = try_write_buf(&ptr[i * page_size]); + + /* ...and make sure same result. */ + ASSERT_TRUE(i % 2 != 0 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mlock()'ed pages work correctly with guard markers. */ +TEST_F(guard_pages, mlock) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Lock. */ + ASSERT_EQ(mlock(ptr, 10 * page_size), 0); + + /* Now try to guard, should fail with EINVAL. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, EINVAL); + + /* OK unlock. */ + ASSERT_EQ(munlock(ptr, 10 * page_size), 0); + + /* Guard first half of range, should now succeed. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure guard works. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + if (i < 5) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + ASSERT_EQ(*curr, 'x'); + } + } + + /* + * Now lock the latter part of the range. We can't lock the guard pages, + * as this would result in the pages being populated and the guarding + * would cause this to error out. + */ + ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Now remove guard pages, we permit mlock()'d ranges to have guard + * pages removed as it is a non-destructive operation. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now check that no guard pages remain. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * - Moving a mapping alone should retain markers as they are. + */ +TEST_F(guard_pages, mremap_move) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guard pages are in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Map a new region we will move this range into. Doing this ensures + * that we have reserved a range to map into. + */ + ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new); + + /* Make sure the guard markers are retained. */ + ASSERT_FALSE(try_read_write_buf(ptr_new)); + ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size])); + + /* + * Clean up - we only need reference the new pointer as we overwrote the + * PROT_NONE range and moved the existing one. + */ + munmap(ptr_new, 5 * page_size); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Expanding should retain guard pages, only now in different position. The user + * will have to remove guard pages manually to fix up (they'd have to do the + * same if it were a PROT_NONE mapping). + */ +TEST_F(guard_pages, mremap_expand) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 10 pages... */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* ...But unmap the last 5 so we can ensure we can expand into them. */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now expand to 10 pages. */ + ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Reserve a region which we can move to and expand into. */ + ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now move and expand into it. */ + ptr = mremap(ptr, 10 * page_size, 20 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new); + ASSERT_EQ(ptr, ptr_new); + + /* + * Again, make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* + * A real user would have to remove guard markers, but would reasonably + * expect all characteristics of the mapping to be retained, including + * guard markers. + */ + + /* Cleanup. */ + munmap(ptr, 20 * page_size); +} +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Shrinking will result in markers that are shrunk over being removed. Again, + * if the user were using a PROT_NONE mapping they'd have to manually fix this + * up also so this is OK. + */ +TEST_F(guard_pages, mremap_shrink) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now shrink to 3 pages. */ + ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE); + ASSERT_NE(ptr, MAP_FAILED); + + /* We expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * As with expansion, a real user would have to remove guard pages and + * fixup. But you'd have to do similar manual things with PROT_NONE + * mappings too. + */ + + /* + * If we expand back to the original size, the end marker will, of + * course, no longer be present. + */ + ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Again, we expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + munmap(ptr, 5 * page_size); +} + +/* + * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set + * retain guard pages. + */ +TEST_F(guard_pages, fork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish guard apges in the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Assert that the guarding is in effect. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Now unguard the range.*/ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Child unguard does not impact parent page table state. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that forking a process with VMAs that do have VM_WIPEONFORK set + * behave as expected. + */ +TEST_F(guard_pages, fork_wipeonfork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark wipe on fork. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0); + + /* Guard the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Guard will have been wiped. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + exit(0); + } + + /* Parent process. */ + + waitpid(pid, NULL, 0); + + /* Guard markers should be in effect.*/ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_FREE retains guard entries as expected. */ +TEST_F(guard_pages, lazyfree) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensure guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Lazyfree range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0); + + /* This should leave the guard markers in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ +TEST_F(guard_pages, populate) +{ + const unsigned long page_size = self->page_size; + char *ptr; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Populate read should error out... */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1); + ASSERT_EQ(errno, EFAULT); + + /* ...as should populate write. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1); + ASSERT_EQ(errno, EFAULT); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ +TEST_F(guard_pages, cold_pageout) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensured guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now mark cold. This should have no impact on guard markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* OK, now page out. This should equally, have no effect on markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that guard pages do not break userfaultd. */ +TEST_F(guard_pages, uffd) +{ + const unsigned long page_size = self->page_size; + int uffd; + char *ptr; + int i; + struct uffdio_api api = { + .api = UFFD_API, + .features = 0, + }; + struct uffdio_register reg; + struct uffdio_range range; + + /* Set up uffd. */ + uffd = userfaultfd(0); + if (uffd == -1 && errno == EPERM) + ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + ASSERT_NE(uffd, -1); + + ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Register the range with uffd. */ + range.start = (unsigned long)ptr; + range.len = 10 * page_size; + reg.range = range; + reg.mode = UFFDIO_REGISTER_MODE_MISSING; + ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, ®), 0); + + /* Guard the range. This should not trigger the uffd. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* The guarding should behave as usual with no uffd intervention. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0); + close(uffd); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index 432d5af15e66..db63abe5ee5e 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -76,19 +76,15 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) /* Get the free huge pages after unmap*/ free_hpage_a = get_free_hugepages(); + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + /* * If the no. of free hugepages before allocation and after unmap does * not match - that means there could still be a page which is pinned. */ - if (free_hpage_a != free_hpage_b) { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_fail(": Huge pages not freed!\n"); - } else { - ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); - ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); - ksft_test_result_pass(": Huge pages freed successfully !\n"); - } + ksft_test_result(free_hpage_a == free_hpage_b, + "free huge pages from %u-%u\n", start_off, end_off); } int main(void) diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c index 73b81c632366..e2640529dbb2 100644 --- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -5,20 +5,36 @@ #include <sys/mman.h> #include <sys/types.h> #include <unistd.h> +#include <setjmp.h> +#include <signal.h> #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 -char *huge_ptr; +static char *huge_ptr; +static size_t huge_page_size; + +static sigjmp_buf sigbuf; +static bool sigbus_triggered; + +static void signal_handler(int signal) +{ + if (signal == SIGBUS) { + sigbus_triggered = true; + siglongjmp(sigbuf, 1); + } +} /* Touch the memory while it is being madvised() */ void *touch(void *unused) { char *ptr = (char *)huge_ptr; + if (sigsetjmp(sigbuf, 1)) + return NULL; + for (int i = 0; i < INLOOP_ITER; i++) ptr[0] = '.'; @@ -30,7 +46,7 @@ void *madv(void *unused) usleep(rand() % 10); for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, huge_page_size, MADV_DONTNEED); return NULL; } @@ -44,9 +60,23 @@ int main(void) * interactions */ int max = 10000; + int err; + + ksft_print_header(); + ksft_set_plan(1); srand(getpid()); + if (signal(SIGBUS, signal_handler) == SIG_ERR) + ksft_exit_skip("Could not register signal handler."); + + huge_page_size = default_huge_page_size(); + if (!huge_page_size) + ksft_exit_skip("Could not detect default hugetlb page size."); + + ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n", + huge_page_size / 1024); + free_hugepages = get_free_hugepages(); if (free_hugepages != 1) { ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", @@ -54,7 +84,7 @@ int main(void) } while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -66,8 +96,14 @@ int main(void) pthread_join(thread1, NULL); pthread_join(thread2, NULL); - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, huge_page_size); } - return KSFT_PASS; + ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n"); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 2c5394584af4..2fc290d9430c 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -349,10 +349,12 @@ if [ $VADDR64 -ne 0 ]; then # allows high virtual address allocation requests independent # of platform's physical memory. - prev_policy=$(cat /proc/sys/vm/overcommit_memory) - echo 1 > /proc/sys/vm/overcommit_memory - CATEGORY="hugevm" run_test ./virtual_address_range - echo $prev_policy > /proc/sys/vm/overcommit_memory + if [ -x ./virtual_address_range ]; then + prev_policy=$(cat /proc/sys/vm/overcommit_memory) + echo 1 > /proc/sys/vm/overcommit_memory + CATEGORY="hugevm" run_test ./virtual_address_range + echo $prev_policy > /proc/sys/vm/overcommit_memory + fi # va high address boundary switch test ARCH_ARM64="arm64" diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 4e4c1e311247..2a2b69e91950 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -64,7 +64,7 @@ #define NR_CHUNKS_HIGH NR_CHUNKS_384TB #endif -static char *hind_addr(void) +static char *hint_addr(void) { int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); @@ -185,7 +185,7 @@ int main(int argc, char *argv[]) } for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); + hint = hint_addr(); hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/tools/testing/selftests/module/Makefile b/tools/testing/selftests/module/Makefile new file mode 100644 index 000000000000..6132d7ddb08b --- /dev/null +++ b/tools/testing/selftests/module/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for module loading selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := find_symbol.sh + +include ../lib.mk + +# Nothing to clean up. +clean: diff --git a/tools/testing/selftests/module/config b/tools/testing/selftests/module/config new file mode 100644 index 000000000000..b0c206b1ad47 --- /dev/null +++ b/tools/testing/selftests/module/config @@ -0,0 +1,3 @@ +CONFIG_TEST_RUNTIME=y +CONFIG_TEST_RUNTIME_MODULE=y +CONFIG_TEST_KALLSYMS=m diff --git a/tools/testing/selftests/module/find_symbol.sh b/tools/testing/selftests/module/find_symbol.sh new file mode 100755 index 000000000000..2c56805c9b6e --- /dev/null +++ b/tools/testing/selftests/module/find_symbol.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +# Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org> +# +# This is a stress test script for kallsyms through find_symbol() + +set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +test_reqs() +{ + if ! which modprobe 2> /dev/null > /dev/null; then + echo "$0: You need modprobe installed" >&2 + exit $ksft_skip + fi + + if ! which kmod 2> /dev/null > /dev/null; then + echo "$0: You need kmod installed" >&2 + exit $ksft_skip + fi + + if ! which perf 2> /dev/null > /dev/null; then + echo "$0: You need perf installed" >&2 + exit $ksft_skip + fi + + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo $msg must be run as root >&2 + exit $ksft_skip + fi +} + +load_mod() +{ + local STATS="-e duration_time" + STATS="$STATS -e user_time" + STATS="$STATS -e system_time" + STATS="$STATS -e page-faults" + local MOD=$1 + + local ARCH="$(uname -m)" + case "${ARCH}" in + x86_64) + perf stat $STATS $MODPROBE $MOD + ;; + *) + time $MODPROBE $MOD + exit 1 + ;; + esac +} + +remove_all() +{ + $MODPROBE -r test_kallsyms_b + for i in a b c d; do + $MODPROBE -r test_kallsyms_$i + done +} +test_reqs + +MODPROBE=$(</proc/sys/kernel/modprobe) + +remove_all +load_mod test_kallsyms_b +remove_all + +# Now pollute the namespace +$MODPROBE test_kallsyms_c +load_mod test_kallsyms_b + +# Now pollute the namespace with twice the number of symbols than the last time +remove_all +$MODPROBE test_kallsyms_c +$MODPROBE test_kallsyms_d +load_mod test_kallsyms_b + +exit 0 diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 68801e1a9ec2..70f65eb320a7 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1026,7 +1026,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) "size=100000,mode=700"), 0); ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, - "size=100000,mode=700"), 0); + "size=2m,mode=700"), 0); ASSERT_EQ(mkdir("/mnt/A", 0777), 0); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cb2fc601de66..73ee88d6b043 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py @@ -95,6 +96,7 @@ TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 99b0e8c17fca..04c7ff577bb8 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -54,16 +54,16 @@ struct epoll_params { #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) #endif -static uint32_t cfg_port = 8000; +static uint16_t cfg_port = 8000; static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; static char *cfg_outfile; static int cfg_max_events = 8; -static int cfg_ifindex; +static uint32_t cfg_ifindex; /* busy poll params */ static uint32_t cfg_busy_poll_usecs; -static uint32_t cfg_busy_poll_budget; -static uint32_t cfg_prefer_busy_poll; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; /* IRQ params */ static uint32_t cfg_defer_hard_irqs; @@ -79,6 +79,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + unsigned long long tmp; int ret; int c; @@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + switch (c) { case 'u': - cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_usecs == ULONG_MAX || - cfg_busy_poll_usecs > UINT32_MAX) + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; break; case 'P': - cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); - if (cfg_prefer_busy_poll == ULONG_MAX || - cfg_prefer_busy_poll > 1) + if (tmp == ULLONG_MAX || tmp > 1) error(1, ERANGE, "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; break; case 'g': - cfg_busy_poll_budget = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_budget == ULONG_MAX || - cfg_busy_poll_budget > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; break; case 'p': - cfg_port = strtoul(optarg, NULL, 0); - if (cfg_port > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; break; case 'b': ret = inet_aton(optarg, &cfg_bind_addr); @@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv) error(1, 0, "outfile invalid"); break; case 'm': - cfg_max_events = strtol(optarg, NULL, 0); - - if (cfg_max_events == LONG_MIN || - cfg_max_events == LONG_MAX || - cfg_max_events <= 0) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "max events must be > 0 and < LONG_MAX"); + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; break; case 'd': - cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); - - if (cfg_defer_hard_irqs == ULONG_MAX || - cfg_defer_hard_irqs > INT32_MAX) + if (tmp == ULLONG_MAX || tmp > INT32_MAX) error(1, ERANGE, "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; break; case 'r': - cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); - - if (cfg_gro_flush_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, - "gro_flush_timeout must be < ULLONG_MAX"); + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; break; case 's': - cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); - - if (cfg_irq_suspend_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; break; case 'i': - cfg_ifindex = strtoul(optarg, NULL, 0); - if (cfg_ifindex == ULONG_MAX) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "ifindex must be < ULONG_MAX"); + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; break; } } @@ -215,7 +223,7 @@ static void setup_queue(void) struct netdev_napi_set_req *set_req = NULL; struct ynl_sock *ys; struct ynl_error yerr; - uint32_t napi_id; + uint32_t napi_id = 0; ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!ys) @@ -277,8 +285,8 @@ static void run_poller(void) * here */ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; - epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; - epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; epoll_params.__pad = 0; val = 1; @@ -342,5 +350,9 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); setup_queue(); run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + return 0; } diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 876c2db02a63..bc314382e4e1 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -59,6 +59,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" @@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -253,6 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_SOCKET, SO_PRIORITY, &opt.priority); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 1d7e756644bc..478af0aefa97 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -34,13 +34,28 @@ BAD=0 TOTAL=0 check_result() { + local ret=$1 + local got=$2 + local exp=$3 + local case=$4 + local xfail=$5 + local xf= + local inc= + + if [ "$xfail" == "xfail" ]; then + xf="(XFAIL)" + inc=0 + else + inc=1 + fi + ((TOTAL++)) - if [ $1 -ne 0 ]; then - echo " Case $4 returned $1, expected 0" - ((BAD++)) + if [ $ret -ne 0 ]; then + echo " Case $case returned $ret, expected 0 $xf" + ((BAD+=inc)) elif [ "$2" != "$3" ]; then - echo " Case $4 returned '$2', expected '$3'" - ((BAD++)) + echo " Case $case returned '$got', expected '$exp' $xf" + ((BAD+=inc)) fi } @@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" - [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000 + [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail - ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay | + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > '$((delay/2))') print "OK"; - else print snd, "-", sch, "<", '$((delay/2))'; }') - check_result $? "$ts" "OK" "$prot - TXTIME rel" + END { if (snd - sch > 500) print "OK"; + else print snd, "-", sch, "<", 500; }') + check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail done done diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c03151e7791c..c159230c9b62 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -49,7 +49,7 @@ test_dup_vxlan_self() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -59,7 +59,7 @@ test_dup_vxlan_master() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -79,7 +79,7 @@ test_dup_macvlan_master() ip_link_add br up type bridge vlan_filtering 1 ip_link_add dd up type dummy ip_link_add mv up link dd type macvlan mode passthru - ip_link_master mv br + ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 1d58b3b87465..847936363a12 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -291,6 +291,37 @@ fib_rule6_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi } fib_rule6_vrf_test() diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 7d885cff8d79..00bde7b6f39e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_reserved.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7337f398f9cc..8de80acf249e 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -68,6 +68,7 @@ declare -A NETIFS=( : "${REQUIRE_JQ:=yes}" : "${REQUIRE_MZ:=yes}" : "${REQUIRE_MTOOLS:=no}" +: "${REQUIRE_TEAMD:=no}" # Whether to override MAC addresses on interfaces participating in the test. : "${STABLE_MAC_ADDRS:=no}" @@ -321,6 +322,9 @@ fi if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi +if [[ "$REQUIRE_TEAMD" = "yes" ]]; then + require_command $TEAMD +fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then # https://github.com/troglobit/mtools require_command msend @@ -932,13 +936,6 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() -{ - local if_name=$1 - - ip -j link show dev $if_name | jq -r '.[]["address"]' -} - ether_addr_to_u64() { local addr="$1" diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index c35548767756..ecd34f364125 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -7,7 +7,6 @@ ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes -REQUIRE_MZ=no source lib.sh diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index fe4d7c906a70..a20d22d1df36 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -49,6 +49,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 1261e6f46e34..ff7049582d35 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -53,6 +53,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh index e064b946e821..16583a470ec3 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -109,6 +109,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 " +REQUIRE_TEAMD="yes" NUM_NETIFS=8 source lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh index f05ffe213c46..2a4cd1af1b85 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -76,6 +76,7 @@ ping_ipv4 ping_ipv6 "} +REQUIRE_TEAMD="yes" NUM_NETIFS=8 : ${lib_dir:=.} source $lib_dir/lib.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..46c31794b91b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip_link_set_addr br1 $(mac_get $swp1) + ip_link_set_up br1 + + ip_link_set_up $rp1 + ip_addr_add $rp1 192.0.2.17/28 + ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + ip_link_set_master $swp1 br1 + ip_link_set_up $swp1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + defer simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..9b44a091802c 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 8994fec1c38f..0bd9a038a1f0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -435,6 +435,13 @@ xfail_on_veth() fi } +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + kill_process() { local pid=$1; shift @@ -451,7 +458,7 @@ ip_link_add() defer ip link del dev "$name" } -ip_link_master() +ip_link_set_master() { local member=$1; shift local master=$1; shift @@ -459,3 +466,62 @@ ip_link_master() ip link set dev "$member" master "$master" defer ip link set dev "$member" nomaster } + +ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_is_up() +{ + local name=$1; shift + + local state=$(ip -j link show "$name" | + jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') + [[ $state == "UP" ]] +} + +ip_link_set_up() +{ + local name=$1; shift + + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up + defer ip link set dev "$name" down + fi +} + +ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down + defer ip link set dev "$name" up + fi +} + +ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" + defer ip addr del dev "$name" "$@" +} + +ip_route_add() +{ + ip route add "$@" + defer ip route del "$@" +} + +bridge_vlan_add() +{ + bridge vlan add "$@" + defer bridge vlan del "$@" +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 18b9443454a9..bc6b6762baf3 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 477ae76de93d..3efe005436cd 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""): _fail("Check failed", a, "not in", b, comment) +def ksft_is(a, b, comment=""): + if a is not b: + _fail("Check failed", a, "is not", b, comment) + + def ksft_ge(a, b, comment=""): if a < b: _fail("Check failed", a, "<", b, comment) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 72590c3f90f1..9e3bcddcf3e8 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -10,7 +10,9 @@ import time class CmdExitFailure(Exception): - pass + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj class cmd: @@ -48,7 +50,7 @@ class cmd: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + (self.proc.args, stdout, stderr), self) class bkg(cmd): diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index a0d689d58c57..ad1e36baee2a 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,14 +13,14 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.lib import YnlFamily, NlError + from net.lib.ynl.pyynl.lib import YnlFamily, NlError else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.lib import YnlFamily, NlError + from net.ynl.pyynl.lib import YnlFamily, NlError except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) @@ -32,23 +32,23 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class NetshaperFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 8e3fc05a5397..c76525fe2b84 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4209b9569039..414addef9a45 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1281,9 +1302,9 @@ again: return ret; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b48b4e56826a..5e3c56253274 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -137,7 +137,7 @@ TEST_GROUP="" #shellcheck disable=SC2317 cleanup() { - rm -f "$cin_disconnect" "$cout_disconnect" + rm -f "$cin_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" @@ -155,7 +155,6 @@ cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) cin_disconnect="$cin".disconnect -cout_disconnect="$cout".disconnect trap cleanup EXIT mptcp_lib_ns_init ns1 ns2 ns3 ns4 @@ -445,12 +444,8 @@ do_transfer() printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" echo cat "$capout" @@ -587,7 +582,7 @@ make_file() mptcp_lib_make_file $name 1024 $ksize dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo "Created $name (size $(du -b "$name")) containing data sent by $who" + echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who" } run_tests_lo() diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c07e2bd3a315..13a3b68181ee 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1039,13 +1039,8 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then fail_test "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - cat /tmp/${connector_ns}.out - + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" return 1 fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 975d4d4c862a..051e289d7967 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -107,6 +107,27 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } +# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file +mptcp_lib_pr_err_stats() { + local lns="${1}" + local cns="${2}" + local port="${3}" + local lstat="${4}" + local cstat="${5}" + + echo -en "${MPTCP_LIB_COLOR_RED}" + { + printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" + ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" + cat "${lstat}" + + printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" + ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" + [ "${lstat}" != "${cstat}" ] && cat "${cstat}" + } 1>&2 + echo -en "${MPTCP_LIB_COLOR_RESET}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 5e8d5b83e2d0..418a903c3a4d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -169,6 +169,11 @@ do_transfer() cmsg+=",TCPINQ" fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${listener_ns} \ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ @@ -189,14 +194,16 @@ do_transfer() wait $spid local rets=$? + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat | grep Tcp > /tmp/${listener_ns}.out + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat | grep Tcp > /tmp/${connector_ns}.out + print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \ + "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out" mptcp_lib_result_fail "transfer ${ip}" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 8fa77c8e9b65..9c2a415976cb 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -155,6 +155,11 @@ do_transfer() sleep 1 fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat -n + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat -n + timeout ${timeout_test} \ ip netns exec ${ns3} \ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ @@ -180,25 +185,27 @@ do_transfer() kill ${cappid_connector} fi + NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \ + nstat | grep Tcp > /tmp/${ns3}.out + NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \ + nstat | grep Tcp > /tmp/${ns1}.out + cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" return 0 fi - mptcp_lib_pr_fail - echo "client exit code $retc, server $rets" 1>&2 - echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 - ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" - echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 - ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + mptcp_lib_pr_fail "client exit code $retc, server $rets" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \ + "/tmp/${ns3}.out" "/tmp/${ns1}.out" ls -l $sin $cout ls -l $cin $sout diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 4485fd7675ed..86ec4e68594d 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -61,9 +61,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0 ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad +# avoid neighbor lookups and enable martian IPv6 pings +ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0 +ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0 + # firewall matches to test [ -n "$iptables" ] && { common='-t raw -A PREROUTING -s 192.168.0.0/16' + common+=' -p icmp --icmp-type echo-request' if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -72,6 +83,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad } [ -n "$ip6tables" ] && { common='-t raw -A PREROUTING -s fec0::/16' + common+=' -p icmpv6 --icmpv6-type echo-request' if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -82,8 +94,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad table inet t { chain c { type filter hook prerouting priority raw; - ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter - ip6 saddr fec0::/16 fib saddr . iif oif exists counter + ip saddr 192.168.0.0/16 icmp type echo-request \ + fib saddr . iif oif exists counter + ip6 saddr fec0::/16 icmpv6 type echo-request \ + fib saddr . iif oif exists counter } } EOF diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93d9d914529b..93e8cb671c3d 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -18,6 +18,23 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -89,7 +106,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..960e1ab4dd04 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -171,8 +171,10 @@ ovs_add_netns_and_veths () { ovs_add_if "$1" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index 4071c133f29e..ef8b25a606d8 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0 <script>" exit "$KSFT_FAIL" fi -script="$1" +script="$(basename $1)" if [ -z "$(which packetdrill)" ]; then ktap_skip_all "packetdrill not found in PATH" @@ -31,16 +31,32 @@ if [ -z "$(which packetdrill)" ]; then fi declare -a optargs +failfunc=ktap_test_fail + if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then optargs+=('--tolerance_usecs=14000') + + # xfail tests that are known flaky with dbg config, not fixable. + # still run them for coverage (and expect 100% pass without dbg). + declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" + "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" + "tcp_timestamping.*.pkt" + "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_epoll_.*.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" + ) + readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" + [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail fi ktap_print_header ktap_set_plan 2 -unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ - && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" -unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ - && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" +unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv4" || $failfunc "ipv4" +unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv6" || $failfunc "ipv6" ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7> + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop> +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop> + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop> ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop> +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop> + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop> + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop> + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop> + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop> + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop> + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop> + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop> + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop> + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop> + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop> + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop> + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7> + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop> + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop> + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop> + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop> + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop> + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop> + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop> ++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop> ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK> + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700> + +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700> + +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200> + +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800> + +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200> + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> + + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 <mss 1460> + +0 > S. 0:0(0) ack 1 <mss 1460> + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index 1803c39dbacb..612a7219990e 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -3,10 +3,9 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh -TEST_PROGS := run.sh \ - test.py +TEST_PROGS := run.sh -TEST_FILES := include.sh +TEST_FILES := include.sh test.py EXTRA_CLEAN := /tmp/rds_logs include.sh diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7f05b5f9b76f..2e8243a65b50 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -29,6 +29,7 @@ ALL_TESTS=" kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr " devdummy="test-dummy0" @@ -44,6 +45,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -1239,6 +1241,99 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 1a706d03bb6b..9a85f93c33d8 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap) if (self->notls) SKIP(return, "no TLS support"); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); @@ -391,7 +393,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -1175,7 +1177,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1668,6 +1670,464 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + FIXTURE(tls_err) { int fd, cfd; @@ -1696,7 +2156,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -2118,7 +2578,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2177,7 +2637,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e7cb8c678bde --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + ip_link_add $port type veth peer name r$port + ip_link_set_up $port + ip_link_set_up r$port + ip_link_set_master $port br + done + + bridge_vlan_add vid 11 dev br self + bridge_vlan_add vid 11 dev d1 master + + bridge_vlan_add vid 12 dev br self + bridge_vlan_add vid 12 dev d2 master + + bridge_vlan_add vid 13 dev br self + bridge_vlan_add vid 13 dev d1 master + bridge_vlan_add vid 13 dev d2 master + + bridge_vlan_add vid 14 dev br self + bridge_vlan_add vid 14 dev d1 master + bridge_vlan_add vid 14 dev d2 master + bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +dfr_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +dfr_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on dfr_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off dfr_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index d43afe243779..12e7cae251be 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -31,7 +31,8 @@ $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ - $(top_srcdir)/tools/net/ynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \ $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ $(OUTPUT)/.libynl-*.sig \ $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile new file mode 100644 index 000000000000..3e84e26341d1 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -0,0 +1,2 @@ +TEST_PROGS = set_pcie_cooling_state.sh +include ../lib.mk diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh new file mode 100755 index 000000000000..9df606552af3 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 +skipmsg="skip all tests:" + +PCIEPORTTYPE="PCIe_Port_Link_Speed" + +prerequisite() +{ + local ports + + if [ $UID != 0 ]; then + echo $skipmsg must be run as root >&2 + exit $ksft_skip + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $skipmsg sysfs is not mounted >&2 + exit $ksft_skip + fi + + if ! ls $SYSFS/class/thermal/cooling_device* > /dev/null 2>&1; then + echo $skipmsg thermal cooling devices missing >&2 + exit $ksft_skip + fi + + ports=`grep -e "^$PCIEPORTTYPE" $SYSFS/class/thermal/cooling_device*/type | wc -l` + if [ $ports -eq 0 ]; then + echo $skipmsg pcie cooling devices missing >&2 + exit $ksft_skip + fi +} + +testport= +find_pcie_port() +{ + local patt="$1" + local pcieports + local max + local cur + local delta + local bestdelta=-1 + + pcieports=`grep -l -F -e "$patt" /sys/class/thermal/cooling_device*/type` + if [ -z "$pcieports" ]; then + return + fi + pcieports=${pcieports//\/type/} + # Find the port with the highest PCIe Link Speed + for port in $pcieports; do + max=`cat $port/max_state` + cur=`cat $port/cur_state` + delta=$((max-cur)) + if [ $delta -gt $bestdelta ]; then + testport="$port" + bestdelta=$delta + fi + done +} + +sysfspcidev= +find_sysfs_pci_dev() +{ + local typefile="$1/type" + local pcidir + + pcidir="$SYSFS/bus/pci/devices/`sed -e "s|^${PCIEPORTTYPE}_||g" $typefile`" + + if [ -r "$pcidir/current_link_speed" ]; then + sysfspcidev="$pcidir/current_link_speed" + fi +} + +usage() +{ + echo "Usage $0 [ -d dev ]" + echo -e "\t-d: PCIe port BDF string (e.g., 0000:00:04.0)" +} + +pattern="$PCIEPORTTYPE" +parse_arguments() +{ + while getopts d:h opt; do + case $opt in + h) + usage "$0" + exit 0 + ;; + d) + pattern="$PCIEPORTTYPE_$OPTARG" + ;; + *) + usage "$0" + exit 0 + ;; + esac + done +} + +parse_arguments "$@" +prerequisite +find_pcie_port "$pattern" +if [ -z "$testport" ]; then + echo $skipmsg "pcie cooling device not found from sysfs" >&2 + exit $ksft_skip +fi +find_sysfs_pci_dev "$testport" +if [ -z "$sysfspcidev" ]; then + echo $skipmsg "PCIe port device not found from sysfs" >&2 + exit $ksft_skip +fi + +./set_pcie_speed.sh "$testport" "$sysfspcidev" +retval=$? + +exit $retval diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh new file mode 100755 index 000000000000..584596949312 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +set -e + +TESTNAME=set_pcie_speed + +declare -a PCIELINKSPEED=( + "2.5 GT/s PCIe" + "5.0 GT/s PCIe" + "8.0 GT/s PCIe" + "16.0 GT/s PCIe" + "32.0 GT/s PCIe" + "64.0 GT/s PCIe" +) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 + +coolingdev="$1" +statefile="$coolingdev/cur_state" +maxfile="$coolingdev/max_state" +linkspeedfile="$2" + +oldstate=`cat $statefile` +maxstate=`cat $maxfile` + +set_state() +{ + local state=$1 + local linkspeed + local expected_linkspeed + + echo $state > $statefile + + sleep 1 + + linkspeed="`cat $linkspeedfile`" + expected_linkspeed=$((maxstate-state)) + expected_str="${PCIELINKSPEED[$expected_linkspeed]}" + if [ ! "${expected_str}" = "${linkspeed}" ]; then + echo "$TESTNAME failed: expected: ${expected_str}; got ${linkspeed}" + retval=1 + fi +} + +cleanup_skip () +{ + set_state $oldstate + exit $ksft_skip +} + +trap cleanup_skip EXIT + +echo "$TESTNAME: testing states $maxstate .. $oldstate with $coolingdev" +for i in $(seq $maxstate -1 $oldstate); do + set_state "$i" +done + +trap EXIT +if [ $retval -eq 0 ]; then + echo "$TESTNAME [PASS]" +else + echo "$TESTNAME [FAIL]" +fi +exit $retval diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore index 93ab9d7e5b7e..5118f0f3edf4 100644 --- a/tools/testing/selftests/pid_namespace/.gitignore +++ b/tools/testing/selftests/pid_namespace/.gitignore @@ -1 +1,2 @@ +pid_max regression_enomem diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile index 9286a1d22cd3..b972f55d07ae 100644 --- a/tools/testing/selftests/pid_namespace/Makefile +++ b/tools/testing/selftests/pid_namespace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -g $(KHDR_INCLUDES) -TEST_GEN_PROGS = regression_enomem +TEST_GEN_PROGS = regression_enomem pid_max LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c new file mode 100644 index 000000000000..51c414faabb0 --- /dev/null +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -0,0 +1,358 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/types.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + char *stack; + pid_t ret; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg); +#else + ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg); +#endif + free(stack); + return ret; +} + +static int pid_max_cb(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "500", sizeof("500") - 1); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + for (int i = 0; i < 501; i++) { + pid = fork(); + if (pid == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pid); + if (pid > 500) { + fprintf(stderr, "Managed to create pid number beyond limit\n"); + return -1; + } + } + + return 0; +} + +static int pid_max_nested_inner(void *data) +{ + int fret = -1; + pid_t pids[2]; + int fd, i, ret; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + pids[0] = fork(); + if (pids[0] < 0) { + fprintf(stderr, "Failed to create first new process\n"); + return fret; + } + + if (pids[0] == 0) + exit(EXIT_SUCCESS); + + pids[1] = fork(); + wait_for_pid(pids[0]); + if (pids[1] >= 0) { + if (pids[1] == 0) + exit(EXIT_SUCCESS); + wait_for_pid(pids[1]); + + fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n"); + return fret; + } + + /* Now make sure that we wrap pids at 400. */ + for (i = 0; i < 510; i++) { + pid_t pid; + + pid = fork(); + if (pid < 0) + return fret; + + if (pid == 0) + exit(EXIT_SUCCESS); + + wait_for_pid(pid); + if (pid >= 500) { + fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); + return fret; + } + } + + return 0; +} + +static int pid_max_nested_outer(void *data) +{ + int fret = -1, nr_procs = 400; + pid_t pids[1000]; + int fd, i, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + /* + * Create 397 processes. This leaves room for do_clone() (398) and + * one more 399. So creating another process needs to fail. + */ + for (nr_procs = 0; nr_procs < 396; nr_procs++) { + pid = fork(); + if (pid < 0) + goto reap; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + goto reap; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_inner(void *data) +{ + int fret = -1, nr_procs = 400; + int fd, ret; + pid_t pid; + pid_t pids[1000]; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return fret; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return fret; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return fret; + } + + ret = write(fd, "500", sizeof("500") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return fret; + } + + for (nr_procs = 0; nr_procs < 500; nr_procs++) { + pid = fork(); + if (pid < 0) + break; + + if (pid == 0) + exit(EXIT_SUCCESS); + + pids[nr_procs] = pid; + } + + if (nr_procs >= 400) { + fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); + goto reap; + } + + fret = 0; + +reap: + for (int i = 0; i < nr_procs; i++) + wait_for_pid(pids[i]); + + return fret; +} + +static int pid_max_nested_limit_outer(void *data) +{ + int fd, ret; + pid_t pid; + + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); + if (ret) { + fprintf(stderr, "%m - Failed to make rootfs private mount\n"); + return -1; + } + + umount2("/proc", MNT_DETACH); + + ret = mount("proc", "/proc", "proc", 0, NULL); + if (ret) { + fprintf(stderr, "%m - Failed to mount proc\n"); + return -1; + } + + fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); + if (fd < 0) { + fprintf(stderr, "%m - Failed to open pid_max\n"); + return -1; + } + + ret = write(fd, "400", sizeof("400") - 1); + close(fd); + if (ret < 0) { + fprintf(stderr, "%m - Failed to write pid_max\n"); + return -1; + } + + pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS); + if (pid < 0) { + fprintf(stderr, "%m - Failed to clone nested pidns\n"); + return -1; + } + + if (wait_for_pid(pid)) { + fprintf(stderr, "%m - Nested pid_max failed\n"); + return -1; + } + + return 0; +} + +TEST(pid_max_simple) +{ + pid_t pid; + + + pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested_limit) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST(pid_max_nested) +{ + pid_t pid; + + pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS); + ASSERT_GT(pid, 0); + ASSERT_EQ(0, wait_for_pid(pid)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 973198a3ec3d..bf92481f925c 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -6,3 +6,5 @@ pidfd_wait pidfd_fdinfo_test pidfd_getfd_test pidfd_setns_test +pidfd_file_handle_test +pidfd_bind_mount diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index d731e3e76d5b..301343a11b62 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -2,7 +2,8 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ - pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test + pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ + pidfd_file_handle_test pidfd_bind_mount include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 88d6830ee004..0b96ac4b8ce5 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -12,11 +12,11 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> -#include <sys/mount.h> #include <sys/types.h> #include <sys/wait.h> #include "../kselftest.h" +#include "../clone3/clone3_selftests.h" #ifndef P_PIDFD #define P_PIDFD 3 @@ -68,6 +68,11 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 +static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options) +{ + return syscall(__NR_waitid, which, pid, info, options, NULL); +} + static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -114,4 +119,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags) return syscall(__NR_memfd_create, name, flags); } +static inline pid_t create_child(int *pidfd, unsigned flags) +{ + struct __clone_args args = { + .flags = CLONE_PIDFD | flags, + .exit_signal = SIGCHLD, + .pidfd = ptr_to_u64(pidfd), + }; + + return sys_clone3(&args, sizeof(struct __clone_args)); +} + +static inline ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = read(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static inline ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c new file mode 100644 index 000000000000..7822dd080258 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #elif defined __ia64__ + #define __NR_move_mount (428 + 1024) + #else + #define __NR_move_mount 429 + #endif +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +FIXTURE(pidfd_bind_mount) { + char template[PATH_MAX]; + int fd_tmp; + int pidfd; + struct stat st1; + struct stat st2; + __u32 gen1; + __u32 gen2; + bool must_unmount; +}; + +FIXTURE_SETUP(pidfd_bind_mount) +{ + self->fd_tmp = -EBADF; + self->must_unmount = false; + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX); + self->fd_tmp = mkstemp(self->template); + ASSERT_GE(self->fd_tmp, 0); + self->pidfd = sys_pidfd_open(getpid(), 0); + ASSERT_GE(self->pidfd, 0); + ASSERT_GE(fstat(self->pidfd, &self->st1), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0); +} + +FIXTURE_TEARDOWN(pidfd_bind_mount) +{ + ASSERT_EQ(close(self->fd_tmp), 0); + if (self->must_unmount) + ASSERT_EQ(umount2(self->template, 0), 0); + ASSERT_EQ(unlink(self->template), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy. + */ +TEST_F(pidfd_bind_mount, bind_mount) +{ + int fd_tree; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + ASSERT_EQ(close(fd_tree), 0); +} + +/* Test that a pidfd can be reopened through procfs. */ +TEST_F(pidfd_bind_mount, reopen) +{ + int pidfd; + char proc_path[PATH_MAX]; + + sprintf(proc_path, "/proc/self/fd/%d", self->pidfd); + pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_GE(fstat(self->pidfd, &self->st2), 0); + ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that a detached mount can be created for a pidfd and then + * attached to the filesystem hierarchy and reopened. + */ +TEST_F(pidfd_bind_mount, bind_mount_reopen) +{ + int fd_tree, fd_pidfd_mnt; + + fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); + ASSERT_GE(fd_tree, 0); + + ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0); + self->must_unmount = true; + + fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC); + ASSERT_GE(fd_pidfd_mnt, 0); + + ASSERT_GE(fstat(fd_tree, &self->st2), 0); + ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0); + + ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino); + ASSERT_TRUE(self->gen1 == self->gen2); + + ASSERT_EQ(close(fd_tree), 0); + ASSERT_EQ(close(fd_pidfd_mnt), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c new file mode 100644 index 000000000000..439b9c6c0457 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(file_handle) +{ + pid_t pid; + int pidfd; + + pid_t child_pid1; + int child_pidfd1; + + pid_t child_pid2; + int child_pidfd2; + + pid_t child_pid3; + int child_pidfd3; +}; + +FIXTURE_SETUP(file_handle) +{ + int ret; + int ipc_sockets[2]; + char c; + + self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + EXPECT_GE(self->child_pid1, 0); + + if (self->child_pid1 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid2, 0); + + if (self->child_pid2 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); + + ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + EXPECT_EQ(ret, 0); + + self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid3, 0); + + if (self->child_pid3 == 0) { + close(ipc_sockets[0]); + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + _exit(EXIT_FAILURE); + + close(ipc_sockets[1]); + + pause(); + _exit(EXIT_SUCCESS); + } + + close(ipc_sockets[1]); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + close(ipc_sockets[0]); +} + +FIXTURE_TEARDOWN(file_handle) +{ + EXPECT_EQ(close(self->pidfd), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0); + if (self->child_pidfd1 >= 0) + EXPECT_EQ(0, close(self->child_pidfd1)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0); + if (self->child_pidfd2 >= 0) + EXPECT_EQ(0, close(self->child_pidfd2)); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); + + if (self->child_pidfd3 >= 0) { + EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(0, close(self->child_pidfd3)); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + } +} + +/* + * Test that we can decode a pidfs file handle in the same pid + * namespace. + */ +TEST_F(file_handle, file_handle_same_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle from a child pid + * namespace. + */ +TEST_F(file_handle, file_handle_child_pidns) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + free(fh); +} + +/* + * Test that we fail to decode a pidfs file handle from an ancestor + * child pid namespace. + */ +TEST_F(file_handle, file_handle_foreign_pidns) +{ + int mnt_id; + struct file_handle *fh; + pid_t pid; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int pidfd = open_by_handle_at(self->pidfd, fh, 0); + if (pidfd >= 0) { + TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy"); + _exit(1); + } + _exit(0); + } + + ASSERT_EQ(wait_for_pid(pid), 0); + + free(fh); +} + +/* + * Test that we can decode a pidfs file handle of a process that has + * exited but not been reaped. + */ +TEST_F(file_handle, pid_has_exited) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); +} + +/* + * Test that we fail to decode a pidfs file handle of a process that has + * already been reaped. + */ +TEST_F(file_handle, pid_has_been_reaped) +{ + int mnt_id, pidfd, child_pidfd3; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + child_pidfd3 = self->child_pidfd3; + self->child_pidfd3 = -EBADF; + EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0); + EXPECT_EQ(close(child_pidfd3), 0); + EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_LT(pidfd, 0); +} + +/* + * Test valid flags to open a pidfd file handle. Note, that + * PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to + * O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL. + */ +TEST_F(file_handle, open_by_handle_at_valid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, + O_RDONLY | + O_WRONLY | + O_RDWR | + O_NONBLOCK | + O_NDELAY | + O_CLOEXEC | + O_EXCL); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +/* + * Test that invalid flags passed to open a pidfd file handle are + * rejected. + */ +TEST_F(file_handle, open_by_handle_at_invalid_flags) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + static const struct invalid_pidfs_file_handle_flags { + int oflag; + const char *oflag_name; + } invalid_pidfs_file_handle_flags[] = { + { FASYNC, "FASYNC" }, + { O_CREAT, "O_CREAT" }, + { O_NOCTTY, "O_NOCTTY" }, + { O_CREAT, "O_CREAT" }, + { O_TRUNC, "O_TRUNC" }, + { O_APPEND, "O_APPEND" }, + { O_SYNC, "O_SYNC" }, + { O_DSYNC, "O_DSYNC" }, + { O_DIRECT, "O_DIRECT" }, + { O_DIRECTORY, "O_DIRECTORY" }, + { O_NOFOLLOW, "O_NOFOLLOW" }, + { O_NOATIME, "O_NOATIME" }, + { O_PATH, "O_PATH" }, + { O_TMPFILE, "O_TMPFILE" }, + /* + * O_LARGEFILE is added implicitly by + * open_by_handle_at() so pidfs simply masks it off. + */ + }; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) { + pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag); + ASSERT_LT(pidfd, 0) { + TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name); + } + } +} + +/* Test that lookup fails. */ +TEST_F(file_handle, lookup_must_fail) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, ENOTDIR); + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0); + ASSERT_EQ(errno, ENOTDIR); +} + +#ifndef AT_HANDLE_CONNECTABLE +#define AT_HANDLE_CONNECTABLE 0x002 +#endif + +/* + * Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles + * don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE + * is rejected because it is incompatible with AT_EMPTY_PATH which is + * required with pidfds as we don't support lookup. + */ +TEST_F(file_handle, invalid_name_to_handle_at_flags) +{ + int mnt_id; + struct file_handle *fh; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0); +} + +#ifndef AT_HANDLE_FID +#define AT_HANDLE_FID 0x200 +#endif + +/* + * Test that a request with AT_HANDLE_FID always leads to decodable file + * handle as pidfs always provides export operations. + */ +TEST_F(file_handle, valid_name_to_handle_at_flags) +{ + int mnt_id, pidfd; + struct file_handle *fh; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0); + + ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7c2a4349170a..222f8131283b 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -19,7 +19,6 @@ #include <linux/ioctl.h> #include "pidfd.h" -#include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" #ifndef PIDFS_IOCTL_MAGIC @@ -118,22 +117,6 @@ FIXTURE(current_nsset) int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; -static int sys_waitid(int which, pid_t pid, int options) -{ - return syscall(__NR_waitid, which, pid, NULL, options, NULL); -} - -pid_t create_child(int *pidfd, unsigned flags) -{ - struct __clone_args args = { - .flags = CLONE_PIDFD | flags, - .exit_signal = SIGCHLD, - .pidfd = ptr_to_u64(pidfd), - }; - - return sys_clone3(&args, sizeof(struct clone_args)); -} - static bool switch_timens(void) { int fd, ret; @@ -150,28 +133,6 @@ static bool switch_timens(void) return ret == 0; } -static ssize_t read_nointr(int fd, void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = read(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - -static ssize_t write_nointr(int fd, const void *buf, size_t count) -{ - ssize_t ret; - - do { - ret = write(fd, buf, count); - } while (ret < 0 && errno == EINTR); - - return ret; -} - FIXTURE_SETUP(current_nsset) { int i, proc_fd, ret; @@ -229,7 +190,7 @@ FIXTURE_SETUP(current_nsset) _exit(EXIT_SUCCESS); } - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0); self->pidfd = sys_pidfd_open(self->pid, 0); EXPECT_GE(self->pidfd, 0) { @@ -432,9 +393,9 @@ FIXTURE_TEARDOWN(current_nsset) EXPECT_EQ(0, close(self->child_pidfd1)); if (self->child_pidfd2 >= 0) EXPECT_EQ(0, close(self->child_pidfd2)); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0); - ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0); + ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0); } static int preserve_ns(const int pid, const char *ns) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 0dcb8365ddc3..1e2d49751cde 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -26,22 +26,11 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif -static pid_t sys_clone3(struct clone_args *args) -{ - return syscall(__NR_clone3, args, sizeof(struct clone_args)); -} - -static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, - struct rusage *ru) -{ - return syscall(__NR_waitid, which, pid, info, options, ru); -} - TEST(wait_simple) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -55,7 +44,7 @@ TEST(wait_simple) pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; @@ -63,18 +52,18 @@ TEST(wait_simple) pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(pidfd, 0); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_NE(pid, 0); EXPECT_EQ(close(pidfd), 0); pidfd = -1; - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); - pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_GE(pid, 0); ASSERT_EQ(WIFEXITED(info.si_status), true); ASSERT_EQ(WEXITSTATUS(info.si_status), 0); @@ -89,7 +78,7 @@ TEST(wait_states) { int pidfd = -1; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .pidfd = ptr_to_u64(&pidfd), .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, @@ -102,7 +91,7 @@ TEST(wait_states) }; ASSERT_EQ(pipe(pfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -117,28 +106,28 @@ TEST(wait_states) } close(pfd[0]); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0); ASSERT_EQ(write(pfd[1], "C", 1), 1); close(pfd[1]); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_CONTINUED); ASSERT_EQ(info.si_pid, parent_tid); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_KILLED); ASSERT_EQ(info.si_pid, parent_tid); @@ -151,7 +140,7 @@ TEST(wait_nonblock) int pidfd; unsigned int flags = 0; pid_t parent_tid = -1; - struct clone_args args = { + struct __clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), .flags = CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, @@ -173,12 +162,12 @@ TEST(wait_nonblock) SKIP(return, "Skipping PIDFD_NONBLOCK test"); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, ECHILD); EXPECT_EQ(close(pidfd), 0); - pid = sys_clone3(&args); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); if (pid == 0) { @@ -201,7 +190,7 @@ TEST(wait_nonblock) * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when * child processes exist but none have exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED); ASSERT_LT(ret, 0); ASSERT_EQ(errno, EAGAIN); @@ -210,19 +199,19 @@ TEST(wait_nonblock) * WNOHANG raised explicitly when child processes exist but none have * exited. */ - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG); ASSERT_EQ(ret, 0); ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_STOPPED); ASSERT_EQ(info.si_pid, parent_tid); ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); - ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0); ASSERT_EQ(info.si_signo, SIGCHLD); ASSERT_EQ(info.si_code, CLD_EXITED); ASSERT_EQ(info.si_pid, parent_tid); diff --git a/tools/testing/selftests/powerpc/alignment/settings b/tools/testing/selftests/powerpc/alignment/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c index 580fcac0a09f..b71ef8a493ed 100644 --- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c +++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c @@ -20,7 +20,7 @@ static int test_gettimeofday(void) gettimeofday(&tv_end, NULL); } - timersub(&tv_start, &tv_end, &tv_diff); + timersub(&tv_end, &tv_start, &tv_diff); printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6); diff --git a/tools/testing/selftests/powerpc/cache_shape/settings b/tools/testing/selftests/powerpc/cache_shape/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/copyloops/settings b/tools/testing/selftests/powerpc/copyloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dexcr/settings b/tools/testing/selftests/powerpc/dexcr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 51729d9a7111..3a0129467de6 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -35,10 +35,18 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + #define PKEY_BITS_PER_PKEY 2 #define NR_PKEYS 32 #define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + inline unsigned long pkeyreg_get(void) { return mfspr(SPRN_AMR); diff --git a/tools/testing/selftests/powerpc/lib/settings b/tools/testing/selftests/powerpc/lib/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/lib/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/math/settings b/tools/testing/selftests/powerpc/math/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mce/settings b/tools/testing/selftests/powerpc/mce/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/settings b/tools/testing/selftests/powerpc/mm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c index ed9143990888..9c0d343d7137 100644 --- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -175,7 +175,7 @@ static int test(void) page_size = getpagesize(); getrlimit(RLIMIT_STACK, &rlimit); - printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + printf("Stack rlimit is 0x%llx\n", (unsigned long long)rlimit.rlim_cur); printf("Testing loads ...\n"); test_one_type(LOAD, page_size, rlimit.rlim_cur); diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 3ae77ba93208..8cf9fd5fed1c 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -211,8 +211,8 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at %p\n", - file_name, filesize, fileblock); + printf("allocated %s for 0x%llx bytes at %p\n", + file_name, (long long)filesize, fileblock); printf("testing file map...\n"); diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 48344a74b212..35f0098399cc 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -313,16 +313,16 @@ static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) fclose(f); - if (nr_anamolies == 0) { - remove(path); - return; - } - sprintf(logfile, logfilename, tid); strcpy(path, logdir); strcat(path, separator); strcat(path, logfile); + if (nr_anamolies == 0) { + remove(path); + return; + } + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", tid, nr_anamolies, path); } diff --git a/tools/testing/selftests/powerpc/nx-gzip/settings b/tools/testing/selftests/powerpc/nx-gzip/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_attributes/settings b/tools/testing/selftests/powerpc/papr_attributes/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_sysparm/settings b/tools/testing/selftests/powerpc/papr_sysparm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_vpd/settings b/tools/testing/selftests/powerpc/papr_vpd/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2070a1e2b3a5..d8dd9a9c6c1b 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -144,9 +144,6 @@ static int test_body(void) /* Run for 16Bi instructions */ FAIL_IF(do_count_loop(events, 16000000000, overhead, true)); - /* Run for 64Bi instructions */ - FAIL_IF(do_count_loop(events, 64000000000, overhead, true)); - event_close(&events[0]); event_close(&events[1]); diff --git a/tools/testing/selftests/powerpc/pmu/settings b/tools/testing/selftests/powerpc/pmu/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/primitives/settings b/tools/testing/selftests/powerpc/primitives/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6da4cb30cd6..f061434af452 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -16,26 +16,7 @@ #include <unistd.h> #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif - -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) +#include "pkeys.h" #define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ @@ -61,16 +42,6 @@ struct shared_info { time_t core_time; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int increase_core_file_limit(void) { struct rlimit rlim; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index d89474377f11..fc633014424f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -7,26 +7,7 @@ */ #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif - -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) +#include "pkeys.h" static const char user_read[] = "[User Read (Running)]"; static const char user_write[] = "[User Write (Running)]"; @@ -61,11 +42,6 @@ struct shared_info { unsigned long invalid_uamor; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - static int child(struct shared_info *info) { unsigned long reg; diff --git a/tools/testing/selftests/powerpc/ptrace/settings b/tools/testing/selftests/powerpc/ptrace/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/scripts/settings b/tools/testing/selftests/powerpc/scripts/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index f43aa4b77fba..9a4612e2e953 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -36,8 +36,7 @@ fi tainted=$(cat /proc/sys/kernel/tainted) if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel already tainted!" >&2 - exit 1 + echo "Warning: kernel already tainted! ($tainted)" >&2 fi mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" @@ -68,9 +67,10 @@ fi echo "Waiting for timeout ..." wait +orig_tainted=$tainted tainted=$(cat /proc/sys/kernel/tainted) -if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel became tainted!" >&2 +if [[ "$tainted" != "$orig_tainted" ]]; then + echo "Error: kernel newly tainted, before ($orig_tainted) after ($tainted)" >&2 exit 1 fi diff --git a/tools/testing/selftests/powerpc/security/settings b/tools/testing/selftests/powerpc/security/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index 08f9afe3b95c..c101b1391696 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -321,5 +321,5 @@ int main(int argc, char **argv) if (!args) args = ARG_COMPLETE; - test_harness(signal_fuzzer, "signal_fuzzer"); + return test_harness(signal_fuzzer, "signal_fuzzer"); } diff --git a/tools/testing/selftests/powerpc/stringloops/settings b/tools/testing/selftests/powerpc/stringloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/switch_endian/settings b/tools/testing/selftests/powerpc/switch_endian/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/syscalls/settings b/tools/testing/selftests/powerpc/syscalls/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 421cb082f6be..0a4bc479ae39 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -176,5 +176,5 @@ int tm_signal_context_force_tm(void) int main(int argc, char **argv) { - test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); + return test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 06b801906f27..968864b052ec 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -46,6 +46,5 @@ int tm_signal_sigreturn_nt(void) int main(int argc, char **argv) { - test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); + return test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); } - diff --git a/tools/testing/selftests/powerpc/vphn/settings b/tools/testing/selftests/powerpc/vphn/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c index 81d3069ffb84..f348f54914a9 100644 --- a/tools/testing/selftests/powerpc/vphn/test-vphn.c +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -275,7 +275,7 @@ static struct test { } }, { - /* Parse a 32-bit value split accross two consecutives 64-bit + /* Parse a 32-bit value split across two consecutives 64-bit * input values. */ "vphn: 16-bit value followed by 2 x 32-bit values", diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 134cdef5a6e0..48a8052d5dae 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -181,10 +181,11 @@ done # Function to check for presence of a file on the specified system. # Complain if the system cannot be reached, and retry after a wait. -# Currently just waits forever if a machine disappears. +# Currently just waits 15 minutes if a machine disappears. # # Usage: checkremotefile system pathname checkremotefile () { + local nsshfails=0 local ret local sleeptime=60 @@ -195,6 +196,11 @@ checkremotefile () { if test "$ret" -eq 255 then echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" + nsshfails=$((nsshfails+1)) + if ((nsshfails > 15)) + then + return 255 + fi elif test "$ret" -eq 0 then return 0 @@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" for i in $systems do echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" - while checkremotefile "$i" "$resdir/$ds/remote.run" + while : do + checkremotefile "$i" "$resdir/$ds/remote.run" + ret=$? + if test "$ret" -eq 1 + then + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" + ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) + break; + fi + if test "$ret" -eq 255 + then + echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log" + break; + fi sleep 30 done - echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" - ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 8e50bfd4b710..90318591dae2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs rcutree.use_softirq=0 +rcutorture.preempt_duration=10 diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 7ce03d832b64..099b8c1f46f8 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn +RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/abi/.gitignore b/tools/testing/selftests/riscv/abi/.gitignore new file mode 100644 index 000000000000..b38358f91c4d --- /dev/null +++ b/tools/testing/selftests/riscv/abi/.gitignore @@ -0,0 +1 @@ +pointer_masking diff --git a/tools/testing/selftests/riscv/abi/Makefile b/tools/testing/selftests/riscv/abi/Makefile new file mode 100644 index 000000000000..ed82ff9c664e --- /dev/null +++ b/tools/testing/selftests/riscv/abi/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := pointer_masking + +include ../../lib.mk + +$(OUTPUT)/pointer_masking: pointer_masking.c + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c new file mode 100644 index 000000000000..059d2e87eb1f --- /dev/null +++ b/tools/testing/selftests/riscv/abi/pointer_masking.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <errno.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../../kselftest.h" + +#ifndef PR_PMLEN_SHIFT +#define PR_PMLEN_SHIFT 24 +#endif +#ifndef PR_PMLEN_MASK +#define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) +#endif + +static int dev_zero; + +static int pipefd[2]; + +static sigjmp_buf jmpbuf; + +static void sigsegv_handler(int sig) +{ + siglongjmp(jmpbuf, 1); +} + +static int min_pmlen; +static int max_pmlen; + +static inline bool valid_pmlen(int pmlen) +{ + return pmlen == 0 || pmlen == 7 || pmlen == 16; +} + +static void test_pmlen(void) +{ + ksft_print_msg("Testing available PMLEN values\n"); + + for (int request = 0; request <= 16; request++) { + int pmlen, ret; + + ret = prctl(PR_SET_TAGGED_ADDR_CTRL, request << PR_PMLEN_SHIFT, 0, 0, 0); + if (ret) + goto pr_set_error; + + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + ksft_test_result(ret >= 0, "PMLEN=%d PR_GET_TAGGED_ADDR_CTRL\n", request); + if (ret < 0) + goto pr_get_error; + + pmlen = (ret & PR_PMLEN_MASK) >> PR_PMLEN_SHIFT; + ksft_test_result(pmlen >= request, "PMLEN=%d constraint\n", request); + ksft_test_result(valid_pmlen(pmlen), "PMLEN=%d validity\n", request); + + if (min_pmlen == 0) + min_pmlen = pmlen; + if (max_pmlen < pmlen) + max_pmlen = pmlen; + + continue; + +pr_set_error: + ksft_test_result_skip("PMLEN=%d PR_GET_TAGGED_ADDR_CTRL\n", request); +pr_get_error: + ksft_test_result_skip("PMLEN=%d constraint\n", request); + ksft_test_result_skip("PMLEN=%d validity\n", request); + } + + if (max_pmlen == 0) + ksft_exit_fail_msg("Failed to enable pointer masking\n"); +} + +static int set_tagged_addr_ctrl(int pmlen, bool tagged_addr_abi) +{ + int arg, ret; + + arg = pmlen << PR_PMLEN_SHIFT | tagged_addr_abi; + ret = prctl(PR_SET_TAGGED_ADDR_CTRL, arg, 0, 0, 0); + if (!ret) { + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0); + if (ret == arg) + return 0; + } + + return ret < 0 ? -errno : -ENODATA; +} + +static void test_dereference_pmlen(int pmlen) +{ + static volatile int i; + volatile int *p; + int ret; + + ret = set_tagged_addr_ctrl(pmlen, false); + if (ret) + return ksft_test_result_error("PMLEN=%d setup (%d)\n", pmlen, ret); + + i = pmlen; + + if (pmlen) { + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen)); + + /* These dereferences should succeed. */ + if (sigsetjmp(jmpbuf, 1)) + return ksft_test_result_fail("PMLEN=%d valid tag\n", pmlen); + if (*p != pmlen) + return ksft_test_result_fail("PMLEN=%d bad value\n", pmlen); + ++*p; + } + + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen - 1)); + + /* These dereferences should raise SIGSEGV. */ + if (sigsetjmp(jmpbuf, 1)) + return ksft_test_result_pass("PMLEN=%d dereference\n", pmlen); + ++*p; + ksft_test_result_fail("PMLEN=%d invalid tag\n", pmlen); +} + +static void test_dereference(void) +{ + ksft_print_msg("Testing userspace pointer dereference\n"); + + signal(SIGSEGV, sigsegv_handler); + + test_dereference_pmlen(0); + test_dereference_pmlen(min_pmlen); + test_dereference_pmlen(max_pmlen); + + signal(SIGSEGV, SIG_DFL); +} + +static void execve_child_sigsegv_handler(int sig) +{ + exit(42); +} + +static int execve_child(void) +{ + static volatile int i; + volatile int *p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - 7)); + + signal(SIGSEGV, execve_child_sigsegv_handler); + + /* This dereference should raise SIGSEGV. */ + return *p; +} + +static void test_fork_exec(void) +{ + int ret, status; + + ksft_print_msg("Testing fork/exec behavior\n"); + + ret = set_tagged_addr_ctrl(min_pmlen, false); + if (ret) + return ksft_test_result_error("setup (%d)\n", ret); + + if (fork()) { + wait(&status); + ksft_test_result(WIFEXITED(status) && WEXITSTATUS(status) == 42, + "dereference after fork\n"); + } else { + static volatile int i = 42; + volatile int *p; + + p = (volatile int *)((uintptr_t)&i | 1UL << (__riscv_xlen - min_pmlen)); + + /* This dereference should succeed. */ + exit(*p); + } + + if (fork()) { + wait(&status); + ksft_test_result(WIFEXITED(status) && WEXITSTATUS(status) == 42, + "dereference after fork+exec\n"); + } else { + /* Will call execve_child(). */ + execve("/proc/self/exe", (char *const []) { "", NULL }, NULL); + } +} + +static bool pwrite_wrapper(int fd, void *buf, size_t count, const char *msg) +{ + int ret = pwrite(fd, buf, count, 0); + + if (ret != count) { + ksft_perror(msg); + return false; + } + return true; +} + +static void test_tagged_addr_abi_sysctl(void) +{ + char *err_pwrite_msg = "failed to write to /proc/sys/abi/tagged_addr_disabled\n"; + char value; + int fd; + + ksft_print_msg("Testing tagged address ABI sysctl\n"); + + fd = open("/proc/sys/abi/tagged_addr_disabled", O_WRONLY); + if (fd < 0) { + ksft_test_result_skip("failed to open sysctl file\n"); + ksft_test_result_skip("failed to open sysctl file\n"); + return; + } + + value = '1'; + if (!pwrite_wrapper(fd, &value, 1, "write '1'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL, + "sysctl disabled\n"); + + value = '0'; + if (!pwrite_wrapper(fd, &value, 1, "write '0'")) + ksft_test_result_fail(err_pwrite_msg); + else + ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0, + "sysctl enabled\n"); + + set_tagged_addr_ctrl(0, false); + + close(fd); +} + +static void test_tagged_addr_abi_pmlen(int pmlen) +{ + int i, *p, ret; + + i = ~pmlen; + + if (pmlen) { + p = (int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen)); + + ret = set_tagged_addr_ctrl(pmlen, false); + if (ret) + return ksft_test_result_error("PMLEN=%d ABI disabled setup (%d)\n", + pmlen, ret); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d ABI disabled write\n", pmlen); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d ABI disabled read\n", pmlen); + + if (i != ~pmlen) + return ksft_test_result_fail("PMLEN=%d ABI disabled value\n", pmlen); + + ret = set_tagged_addr_ctrl(pmlen, true); + if (ret) + return ksft_test_result_error("PMLEN=%d ABI enabled setup (%d)\n", + pmlen, ret); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret != sizeof(*p)) + return ksft_test_result_fail("PMLEN=%d ABI enabled write\n", pmlen); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret != sizeof(*p)) + return ksft_test_result_fail("PMLEN=%d ABI enabled read\n", pmlen); + + if (i) + return ksft_test_result_fail("PMLEN=%d ABI enabled value\n", pmlen); + + i = ~pmlen; + } else { + /* The tagged address ABI cannot be enabled when PMLEN == 0. */ + ret = set_tagged_addr_ctrl(pmlen, true); + if (ret != -EINVAL) + return ksft_test_result_error("PMLEN=%d ABI setup (%d)\n", + pmlen, ret); + } + + p = (int *)((uintptr_t)&i | 1UL << (__riscv_xlen - pmlen - 1)); + + ret = write(pipefd[1], p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d invalid tag write (%d)\n", pmlen, errno); + + ret = read(dev_zero, p, sizeof(*p)); + if (ret >= 0 || errno != EFAULT) + return ksft_test_result_fail("PMLEN=%d invalid tag read\n", pmlen); + + if (i != ~pmlen) + return ksft_test_result_fail("PMLEN=%d invalid tag value\n", pmlen); + + ksft_test_result_pass("PMLEN=%d tagged address ABI\n", pmlen); +} + +static void test_tagged_addr_abi(void) +{ + ksft_print_msg("Testing tagged address ABI\n"); + + test_tagged_addr_abi_pmlen(0); + test_tagged_addr_abi_pmlen(min_pmlen); + test_tagged_addr_abi_pmlen(max_pmlen); +} + +static struct test_info { + unsigned int nr_tests; + void (*test_fn)(void); +} tests[] = { + { .nr_tests = 17 * 3, test_pmlen }, + { .nr_tests = 3, test_dereference }, + { .nr_tests = 2, test_fork_exec }, + { .nr_tests = 2, test_tagged_addr_abi_sysctl }, + { .nr_tests = 3, test_tagged_addr_abi }, +}; + +int main(int argc, char **argv) +{ + unsigned int plan = 0; + int ret; + + /* Check if this is the child process after execve(). */ + if (!argv[0][0]) + return execve_child(); + + dev_zero = open("/dev/zero", O_RDWR); + if (dev_zero < 0) + return 1; + + /* Write to a pipe so the kernel must dereference the buffer pointer. */ + ret = pipe(pipefd); + if (ret) + return 1; + + ksft_print_header(); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + plan += tests[i].nr_tests; + + ksft_set_plan(plan); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + tests[i].test_fn(); + + ksft_finished(); +} diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c index 1dd94197da30..6174ffe016dc 100644 --- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -25,6 +25,8 @@ int main(void) unsigned long vl; char *datap, *tmp; + ksft_set_plan(1); + datap = malloc(MAX_VSIZE); if (!datap) { ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE); @@ -63,6 +65,8 @@ int main(void) } free(datap); + + ksft_test_result_pass("tests for v_initval_nolibc pass\n"); ksft_exit_pass(); return 0; } diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 895177f6bf4c..40b3bffcbb40 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -76,6 +76,8 @@ int main(void) long flag, expected; long rc; + ksft_set_plan(1); + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); if (rc < 0) { diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index 37d9bf6fb745..6f4c3f5a1c5d 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p, * If we dispatch to a bogus DSQ that will fall back to the * builtin global DSQ, we fail gracefully. */ - scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL, + scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL, p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dffc97d9cdf1..e4a55027778f 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p, if (cpu >= 0) { /* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */ - scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, - p->scx.dsq_vtime, 0); + scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, + p->scx.dsq_vtime, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 6a7db1502c29..fbda6bf54671 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,9 +43,12 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - target = bpf_get_prandom_u32() % nr_cpus; + if (p->nr_cpus_allowed == nr_cpus) + target = bpf_get_prandom_u32() % nr_cpus; + else + target = scx_bpf_task_cpu(p); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); bpf_task_release(p); } diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 472851b56854..0ff27e57fe43 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx) /* Just sleeping is fine, plenty of scheduling events happening */ sleep(1); - SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR)); bpf_link__destroy(link); + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG)); + return SCX_TEST_PASS; } @@ -50,7 +51,7 @@ static void cleanup(void *ctx) struct scx_test dsp_local_on = { .name = "dsp_local_on", .description = "Verify we can directly dispatch tasks to a local DSQs " - "from osp.dispatch()", + "from ops.dispatch()", .setup = setup, .run = run, .cleanup = cleanup, diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index 1efb50d61040..a7cf868d5e31 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, /* Can only call from ops.select_cpu() */ scx_bpf_select_cpu_dfl(p, 0, 0, &found); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index d75d4faf07f6..4bc36182d3ff 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(DSQ_ID); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 4d4cd8d966db..430f5e13bf55 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -12,6 +12,8 @@ char _license[] SEC("license") = "GPL"; +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) @@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_dsq_move_to_local(DSQ_ID); } void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) @@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index f171ac470970..13d0f5be788d 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, } scx_bpf_put_idle_cpumask(idle_mask); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); } SEC(".struct_ops.link") diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 9efdbb7da928..815f1d5d61ac 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p, saw_local = true; } - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags); } s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index 59bfc4f36167..4bb99699e920 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p, cpu = prev_cpu; dispatch: - scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0); return cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 3bbd5fcdfb18..2a75de11b2cf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching to a random DSQ should fail. */ - scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 0fda57fe0ecf..99d075695c97 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p s32 prev_cpu, u64 wake_flags) { /* Dispatching twice in a row is disallowed. */ - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0); return prev_cpu; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index e6c67bcf5e6e..bfcb96cd4954 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -2,8 +2,8 @@ /* * A scheduler that validates that enqueue flags are properly stored and * applied at dispatch time when a task is directly dispatched from - * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and - * making the test a very basic vtime scheduler. + * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(), + * and making the test a very basic vtime scheduler. * * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. * Copyright (c) 2024 David Vernet <dvernet@meta.com> @@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p, cpu = prev_cpu; scx_bpf_test_and_clear_cpu_idle(cpu); ddsp: - scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); + scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); return cpu; } void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p) { - if (scx_bpf_consume(VTIME_DSQ)) + if (scx_bpf_dsq_move_to_local(VTIME_DSQ)) consumed = true; } diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py new file mode 100755 index 000000000000..0f44a6199495 --- /dev/null +++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Script that checks that SFQ rejects a limit of 1 at the kernel +# level. We can't use iproute2's tc because it does not accept a limit +# of 1. + +import sys +import os + +from pyroute2 import IPRoute +from pyroute2.netlink.exceptions import NetlinkError + +ip = IPRoute() +ifidx = ip.link_lookup(ifname=sys.argv[1]) + +try: + ip.tc('add', 'sfq', ifidx, limit=1) + sys.exit(1) +except NetlinkError: + sys.exit(0) diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 996448afe31b..91d120548bf5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -78,10 +78,10 @@ "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow", - "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass", + "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 16d51936b385..50e8d72781cb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -208,5 +208,25 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4d6f", + "name": "Check that limit of 1 is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [ + ] } ] diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 7dd5668ea8a6..28f35620c499 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -222,8 +222,7 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && - ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 17263696b5d8..66dbb362385f 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -96,10 +96,13 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, p = node; } else { pthread_mutex_unlock(&cachep->lock); - if (cachep->align) - posix_memalign(&p, cachep->align, cachep->size); - else + if (cachep->align) { + if (posix_memalign(&p, cachep->align, cachep->size) < 0) + return NULL; + } else { p = malloc(cachep->size); + } + if (cachep->ctor) cachep->ctor(p); else if (gfp & __GFP_ZERO) @@ -195,8 +198,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } if (cachep->align) { - posix_memalign(&p[i], cachep->align, - cachep->size); + if (posix_memalign(&p[i], cachep->align, + cachep->size) < 0) + break; } else { p[i] = malloc(cachep->size); if (!p[i]) diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index 06c89bdcc515..f67d47d32857 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -2,6 +2,6 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index a6bc51d0b0bf..923ee2492256 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -69,6 +69,7 @@ generated/bit-length.h: FORCE @if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \ echo "Generating $@"; \ echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \ + echo "#define CONFIG_PHYS_ADDR_T_$(LONG_BIT)BIT 1" >> $@; \ fi FORCE: ; diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index e01f66f98982..3e1b6adc027b 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -6,7 +6,7 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index b33b47342d41..9bcf1736bf18 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -4,6 +4,8 @@ #include <stdio.h> #include <stdlib.h> +#include "generated/bit-length.h" + #include "maple-shared.h" #include "vma_internal.h" @@ -87,7 +89,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, * begun. Linking to the tree will have caused this to be incremented, * which means we will get a false positive otherwise. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return vma; } @@ -212,7 +214,7 @@ static bool vma_write_started(struct vm_area_struct *vma) int seq = vma->vm_lock_seq; /* We reset after each check. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; /* The vma_start_write() stub simply increments this value. */ return seq > -1; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index c5b9da034511..1d9fc97b8e80 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -44,7 +44,9 @@ #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 #define VM_DONTEXPAND 0x00040000 +#define VM_LOCKONFAULT 0x00080000 #define VM_ACCOUNT 0x00100000 +#define VM_NORESERVE 0x00200000 #define VM_MIXEDMAP 0x10000000 #define VM_STACK VM_GROWSDOWN #define VM_SHADOW_STACK VM_NONE @@ -53,6 +55,14 @@ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) + +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + #define FIRST_USER_ADDRESS 0UL #define USER_PGTABLES_CEILING 0UL @@ -231,7 +241,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; struct vma_lock *vm_lock; #endif @@ -406,7 +416,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } @@ -698,8 +708,9 @@ static inline void tlb_finish_mmu(struct mmu_gather *) { } -static inline void get_file(struct file *) +static inline struct file *get_file(struct file *f) { + return f; } static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) @@ -920,4 +931,106 @@ static inline bool signal_pending(void *) return false; } +static inline bool is_file_hugepages(struct file *) +{ + return false; +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *, long) +{ + return true; +} + +static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) +{ + return true; +} + +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma->__vm_flags = flags; +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags &= ~flags; +} + +static inline int call_mmap(struct file *, struct vm_area_struct *) +{ + return 0; +} + +static inline int shmem_zero_setup(struct vm_area_struct *) +{ + return 0; +} + +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + +static inline void ksm_add_vma(struct vm_area_struct *) +{ +} + +static inline void perf_event_mmap(struct vm_area_struct *) +{ +} + +static inline bool vma_is_dax(struct vm_area_struct *) +{ + return false; +} + +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *) +{ + return NULL; +} + +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +static inline void vma_set_page_prot(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + pgprot_t vm_page_prot; + + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); + + if (vma_wants_writenotify(vma, vm_page_prot)) { + vm_flags &= ~VM_SHARED; + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); + } + /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ + WRITE_ONCE(vma->vm_page_prot, vm_page_prot); +} + +static inline bool arch_validate_flags(unsigned long) +{ + return true; +} + +static inline void vma_close(struct vm_area_struct *) +{ +} + +static inline int mmap_file(struct file *, struct vm_area_struct *) +{ + return 0; +} + #endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 84ee217ba8ee..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows: --control-port=1234 \ --peer-cid=3 +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + vsock_perf utility ------------------- 'vsock_perf' is a simple tool to measure vsock performance. It works in diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c index d2deb4b15b94..0066e0324d35 100644 --- a/tools/testing/vsock/control.c +++ b/tools/testing/vsock/control.c @@ -27,6 +27,7 @@ #include "timeout.h" #include "control.h" +#include "util.h" static int control_fd = -1; @@ -50,7 +51,6 @@ void control_init(const char *control_host, for (ai = result; ai; ai = ai->ai_next) { int fd; - int val = 1; fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); if (fd < 0) @@ -65,11 +65,8 @@ void control_init(const char *control_host, break; } - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &val, sizeof(val)) < 0) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_REUSEADDR, 1, + "setsockopt SO_REUSEADDR"); if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0) goto next; diff --git a/tools/testing/vsock/msg_zerocopy_common.c b/tools/testing/vsock/msg_zerocopy_common.c index 5a4bdf7b5132..8622e5a0f8b7 100644 --- a/tools/testing/vsock/msg_zerocopy_common.c +++ b/tools/testing/vsock/msg_zerocopy_common.c @@ -14,16 +14,6 @@ #include "msg_zerocopy_common.h" -void enable_so_zerocopy(int fd) -{ - int val = 1; - - if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { - perror("setsockopt"); - exit(EXIT_FAILURE); - } -} - void vsock_recv_completion(int fd, const bool *zerocopied) { struct sock_extended_err *serr; diff --git a/tools/testing/vsock/msg_zerocopy_common.h b/tools/testing/vsock/msg_zerocopy_common.h index 3763c5ccedb9..ad14139e93ca 100644 --- a/tools/testing/vsock/msg_zerocopy_common.h +++ b/tools/testing/vsock/msg_zerocopy_common.h @@ -12,7 +12,6 @@ #define VSOCK_RECVERR 1 #endif -void enable_so_zerocopy(int fd); void vsock_recv_completion(int fd, const bool *zerocopied); #endif /* MSG_ZEROCOPY_COMMON_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index a3d448a075e3..7058dc614c25 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -401,7 +401,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) */ void send_byte(int fd, int expected_ret, int flags) { - const uint8_t byte = 'A'; + static const uint8_t byte = 'A'; send_buf(fd, &byte, sizeof(byte), flags, expected_ret); } @@ -420,7 +420,7 @@ void recv_byte(int fd, int expected_ret, int flags) recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } @@ -486,8 +486,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -505,9 +504,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + unsigned long hash_djb2(const void *data, size_t len) { unsigned long hash = 5381; @@ -651,3 +676,145 @@ void free_test_iovec(const struct iovec *test_iovec, free(iovec); } + +/* Set "unsigned long long" socket option and check that it's indeed set */ +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg) +{ + unsigned long long chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %llu got %llu\n", val, + chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %llu\n", errmsg, val); + exit(EXIT_FAILURE); +; +} + +/* Set "int" socket option and check that it's indeed set */ +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg) +{ + int chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + chkval = ~val; /* just make storage != val */ + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (chkval != val) { + fprintf(stderr, "value mismatch: set %d got %d\n", val, chkval); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %d\n", errmsg, val); + exit(EXIT_FAILURE); +} + +static void mem_invert(unsigned char *mem, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + mem[i] = ~mem[i]; +} + +/* Set "timeval" socket option and check that it's indeed set */ +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg) +{ + struct timeval chkval; + socklen_t chklen; + int err; + + err = setsockopt(fd, level, optname, &val, sizeof(val)); + if (err) { + fprintf(stderr, "setsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + /* just make storage != val */ + chkval = val; + mem_invert((unsigned char *)&chkval, sizeof(chkval)); + chklen = sizeof(chkval); + + err = getsockopt(fd, level, optname, &chkval, &chklen); + if (err) { + fprintf(stderr, "getsockopt err: %s (%d)\n", + strerror(errno), errno); + goto fail; + } + + if (chklen != sizeof(chkval)) { + fprintf(stderr, "size mismatch: set %zu got %d\n", sizeof(val), + chklen); + goto fail; + } + + if (memcmp(&chkval, &val, sizeof(val)) != 0) { + fprintf(stderr, "value mismatch: set %ld:%ld got %ld:%ld\n", + val.tv_sec, val.tv_usec, chkval.tv_sec, chkval.tv_usec); + goto fail; + } + return; +fail: + fprintf(stderr, "%s val %ld:%ld\n", errmsg, val.tv_sec, val.tv_usec); + exit(EXIT_FAILURE); +} + +void enable_so_zerocopy_check(int fd) +{ + setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, + "setsockopt SO_ZEROCOPY"); +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index fff22d4a14c0..e62f46b2b92a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -62,10 +62,19 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); size_t iovec_bytes(const struct iovec *iov, size_t iovnum); unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); struct iovec *alloc_test_iovec(const struct iovec *test_iovec, int iovnum); void free_test_iovec(const struct iovec *test_iovec, struct iovec *iovec, int iovnum); +void setsockopt_ull_check(int fd, int level, int optname, + unsigned long long val, char const *errmsg); +void setsockopt_int_check(int fd, int level, int optname, int val, + char const *errmsg); +void setsockopt_timeval_check(int fd, int level, int optname, + struct timeval val, char const *errmsg); +void enable_so_zerocopy_check(int fd); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_perf.c b/tools/testing/vsock/vsock_perf.c index 4e8578f815e0..75971ac708c9 100644 --- a/tools/testing/vsock/vsock_perf.c +++ b/tools/testing/vsock/vsock_perf.c @@ -33,7 +33,7 @@ static unsigned int port = DEFAULT_PORT; static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; -static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; +static unsigned long long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; static bool zerocopy; static void error(const char *s) @@ -133,7 +133,7 @@ static float get_gbps(unsigned long bits, time_t ns_delta) ((float)ns_delta / NSEC_PER_SEC); } -static void run_receiver(unsigned long rcvlowat_bytes) +static void run_receiver(int rcvlowat_bytes) { unsigned int read_cnt; time_t rx_begin_ns; @@ -162,8 +162,8 @@ static void run_receiver(unsigned long rcvlowat_bytes) printf("Run as receiver\n"); printf("Listen port %u\n", port); printf("RX buffer %lu bytes\n", buf_size_bytes); - printf("vsock buffer %lu bytes\n", vsock_buf_bytes); - printf("SO_RCVLOWAT %lu bytes\n", rcvlowat_bytes); + printf("vsock buffer %llu bytes\n", vsock_buf_bytes); + printf("SO_RCVLOWAT %d bytes\n", rcvlowat_bytes); fd = socket(AF_VSOCK, SOCK_STREAM, 0); @@ -251,6 +251,16 @@ static void run_receiver(unsigned long rcvlowat_bytes) close(fd); } +static void enable_so_zerocopy(int fd) +{ + int val = 1; + + if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + static void run_sender(int peer_cid, unsigned long to_send_bytes) { time_t tx_begin_ns; @@ -439,7 +449,7 @@ static long strtolx(const char *arg) int main(int argc, char **argv) { unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; - unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; + int rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; int peer_cid = -1; bool sender = false; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 8d38dbf8f41f..1eebbc0d5f61 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,12 +22,17 @@ #include <signal.h> #include <sys/ioctl.h> #include <linux/sockios.h> +#include <linux/time64.h> #include "vsock_test_zerocopy.h" #include "timeout.h" #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -429,7 +434,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; unsigned long remote_hash; unsigned long curr_hash; int fd; @@ -444,17 +449,13 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) sock_buf_size = SOCK_BUF_SIZE; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)"); - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &sock_buf_size, sizeof(sock_buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); /* Ready to receive data. */ control_writeln("SRVREADY"); @@ -563,7 +564,7 @@ static time_t current_nsec(void) exit(EXIT_FAILURE); } - return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; } #define RCVTIMEO_TIMEOUT_SEC 1 @@ -586,10 +587,8 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) tv.tv_sec = RCVTIMEO_TIMEOUT_SEC; tv.tv_usec = 0; - if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *)&tv, sizeof(tv)) == -1) { - perror("setsockopt(SO_RCVTIMEO)"); - exit(EXIT_FAILURE); - } + setsockopt_timeval_check(fd, SOL_SOCKET, SO_RCVTIMEO, tv, + "setsockopt(SO_RCVTIMEO)"); read_enter_ns = current_nsec(); @@ -605,7 +604,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) } read_overhead_ns = current_nsec() - read_enter_ns - - 1000000000ULL * RCVTIMEO_TIMEOUT_SEC; + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; if (read_overhead_ns > READ_OVERHEAD_NSEC) { fprintf(stderr, @@ -634,7 +633,8 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) static void test_seqpacket_bigmsg_client(const struct test_opts *opts) { - unsigned long sock_buf_size; + unsigned long long sock_buf_size; + size_t buf_size; socklen_t len; void *data; int fd; @@ -655,13 +655,20 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) sock_buf_size++; - data = malloc(sock_buf_size); + /* size_t can be < unsigned long long */ + buf_size = (size_t)sock_buf_size; + if (buf_size != sock_buf_size) { + fprintf(stderr, "Returned BUFFER_SIZE too large\n"); + exit(EXIT_FAILURE); + } + + data = malloc(buf_size); if (!data) { perror("malloc"); exit(EXIT_FAILURE); } - send_buf(fd, data, sock_buf_size, 0, -EMSGSIZE); + send_buf(fd, data, buf_size, 0, -EMSGSIZE); control_writeln("CLISENT"); @@ -835,7 +842,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) { - unsigned long lowat_val = RCVLOWAT_BUF_SIZE; + int lowat_val = RCVLOWAT_BUF_SIZE; char buf[RCVLOWAT_BUF_SIZE]; struct pollfd fds; short poll_flags; @@ -847,11 +854,8 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) exit(EXIT_FAILURE); } - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &lowat_val, sizeof(lowat_val))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + lowat_val, "setsockopt(SO_RCVLOWAT)"); control_expectln("SRVSENT"); @@ -1357,9 +1361,10 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt static void test_stream_credit_update_test(const struct test_opts *opts, bool low_rx_bytes_test) { - size_t recv_buf_size; + int recv_buf_size; struct pollfd fds; size_t buf_size; + unsigned long long sock_buf_size; void *buf; int fd; @@ -1371,11 +1376,12 @@ static void test_stream_credit_update_test(const struct test_opts *opts, buf_size = RCVLOWAT_CREDIT_UPD_BUF_SIZE; - if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, - &buf_size, sizeof(buf_size))) { - perror("setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); - exit(EXIT_FAILURE); - } + /* size_t can be < unsigned long long */ + sock_buf_size = buf_size; + + setsockopt_ull_check(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + sock_buf_size, + "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)"); if (low_rx_bytes_test) { /* Set new SO_RCVLOWAT here. This enables sending credit @@ -1384,11 +1390,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, */ recv_buf_size = 1 + VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } /* Send one dummy byte here, because 'setsockopt()' above also @@ -1430,11 +1433,8 @@ static void test_stream_credit_update_test(const struct test_opts *opts, recv_buf_size++; /* Updating SO_RCVLOWAT will send credit update. */ - if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, - &recv_buf_size, sizeof(recv_buf_size))) { - perror("setsockopt(SO_RCVLOWAT)"); - exit(EXIT_FAILURE); - } + setsockopt_int_check(fd, SOL_SOCKET, SO_RCVLOWAT, + recv_buf_size, "setsockopt(SO_RCVLOWAT)"); } fds.fd = fd; @@ -1478,6 +1478,236 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) test_stream_credit_update_test(opts, false); } +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) +{ + int fd; + + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1608,6 +1838,21 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unsent_bytes_client, .run_server = test_seqpacket_unsent_bytes_server, }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, {}, }; @@ -1649,6 +1894,11 @@ static const struct option longopts[] = { .val = 's', }, { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -1685,6 +1935,8 @@ static void usage(void) " --peer-cid <cid> CID of the other side\n" " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick <test_id> Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip <test_id> Test ID to skip;\n" " use multiple --skip options to skip more tests\n", DEFAULT_PEER_PORT @@ -1741,6 +1993,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index 04c376b6937f..9d9a6cb9614a 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -162,7 +162,7 @@ static void test_client(const struct test_opts *opts, } if (test_data->so_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c index 6c3e6f70c457..5c3078969659 100644 --- a/tools/testing/vsock/vsock_uring_test.c +++ b/tools/testing/vsock/vsock_uring_test.c @@ -73,7 +73,7 @@ static void vsock_io_uring_client(const struct test_opts *opts, } if (msg_zerocopy) - enable_so_zerocopy(fd); + enable_so_zerocopy_check(fd); iovec = alloc_test_iovec(test_data->vecs, test_data->vecs_cnt); diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c index 1a87a0a77f9f..022865da8e3c 100644 --- a/tools/thermal/thermometer/thermometer.c +++ b/tools/thermal/thermometer/thermometer.c @@ -259,6 +259,7 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, { int fd; char tz_path[PATH_MAX]; + struct tz *tz; sprintf(tz_path, CLASS_THERMAL"/%s/temp", path); @@ -268,13 +269,13 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, return -1; } - thermometer->tz = realloc(thermometer->tz, - sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); - if (!thermometer->tz) { + tz = realloc(thermometer->tz, sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); + if (!tz) { ERROR("Failed to allocate thermometer->tz\n"); return -1; } + thermometer->tz = tz; thermometer->tz[thermometer->nr_tz].fd_temp = fd; thermometer->tz[thermometer->nr_tz].name = strdup(name); thermometer->tz[thermometer->nr_tz].polling = polling; diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index b5878be36125..a6a7dee16622 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -32,8 +32,10 @@ DOCSRC := ../../../Documentation/tools/rtla/ FEATURE_TESTS := libtraceevent FEATURE_TESTS += libtracefs +FEATURE_TESTS += libcpupower FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs +FEATURE_DISPLAY += libcpupower ifeq ($(V),1) Q = diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f8c286712d4..92a6e12e42d3 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -43,6 +43,16 @@ else $(info libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) endif +$(call feature_check,libcpupower) +ifeq ($(feature-libcpupower), 1) + $(call detected,CONFIG_LIBCPUPOWER) + CFLAGS += -DHAVE_LIBCPUPOWER_SUPPORT + EXTLIBS += -lcpupower +else + $(info libcpupower is missing, building without --deepest-idle-state support.) + $(info Please install libcpupower-dev/kernel-tools-libs-devel) +endif + ifeq ($(STOP_ERROR),1) $(error Please, check the errors above.) endif diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt index 4af3fd40f171..dd5621038c55 100644 --- a/tools/tracing/rtla/README.txt +++ b/tools/tracing/rtla/README.txt @@ -11,6 +11,7 @@ RTLA depends on the following libraries and tools: - libtracefs - libtraceevent + - libcpupower (optional, for --deepest-idle-state) It also depends on python3-docutils to compile man pages. @@ -26,6 +27,9 @@ For development, we suggest the following steps for compiling rtla: $ make $ sudo make install $ cd .. + $ cd $libcpupower_src + $ make + $ sudo make install $ cd $rtla_src $ make $ sudo make install diff --git a/tools/tracing/rtla/sample/timerlat_load.py b/tools/tracing/rtla/sample/timerlat_load.py index 8cc5eb2d2e69..a819c3588073 100644 --- a/tools/tracing/rtla/sample/timerlat_load.py +++ b/tools/tracing/rtla/sample/timerlat_load.py @@ -25,50 +25,54 @@ import sys import os parser = argparse.ArgumentParser(description='user-space timerlat thread in Python') -parser.add_argument("cpu", help='CPU to run timerlat thread') -parser.add_argument("-p", "--prio", help='FIFO priority') - +parser.add_argument("cpu", type=int, help='CPU to run timerlat thread') +parser.add_argument("-p", "--prio", type=int, help='FIFO priority') args = parser.parse_args() try: - affinity_mask = { int(args.cpu) } -except: - print("Invalid cpu: " + args.cpu) - exit(1) - -try: - os.sched_setaffinity(0, affinity_mask); -except: - print("Error setting affinity") - exit(1) + affinity_mask = {args.cpu} + os.sched_setaffinity(0, affinity_mask) +except Exception as e: + print(f"Error setting affinity: {e}") + sys.exit(1) -if (args.prio): +if args.prio: try: - param = os.sched_param(int(args.prio)) + param = os.sched_param(args.prio) os.sched_setscheduler(0, os.SCHED_FIFO, param) - except: - print("Error setting priority") - exit(1) + except Exception as e: + print(f"Error setting priority: {e}") + sys.exit(1) try: - timerlat_path = "/sys/kernel/tracing/osnoise/per_cpu/cpu" + args.cpu + "/timerlat_fd" + timerlat_path = f"/sys/kernel/tracing/osnoise/per_cpu/cpu{args.cpu}/timerlat_fd" timerlat_fd = open(timerlat_path, 'r') -except: +except PermissionError: + print("Permission denied. Please check your access rights.") + sys.exit(1) +except OSError: print("Error opening timerlat fd, did you run timerlat -U?") - exit(1) + sys.exit(1) try: - data_fd = open("/dev/full", 'r'); -except: - print("Error opening data fd") + data_fd = open("/dev/full", 'r') +except Exception as e: + print(f"Error opening data fd: {e}") + sys.exit(1) while True: try: timerlat_fd.read(1) - data_fd.read(20*1024*1024) - except: + data_fd.read(20 * 1024 * 1024) + except KeyboardInterrupt: print("Leaving") break + except IOError as e: + print(f"I/O error occurred: {e}") + break + except Exception as e: + print(f"Unexpected error: {e}") + break timerlat_fd.close() data_fd.close() diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 30e3853076a0..45647495ce3b 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -627,7 +627,7 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p auto_house_keeping(¶ms->monitored_cpus); } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index a3907c390d67..4403cc4eba30 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -55,6 +55,7 @@ struct timerlat_hist_params { int entries; int warmup; int buffer_size; + int deepest_idle_state; }; struct timerlat_hist_cpu { @@ -62,9 +63,9 @@ struct timerlat_hist_cpu { int *thread; int *user; - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -281,6 +282,21 @@ static void timerlat_hist_header(struct osnoise_tool *tool) } /* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + +/* * timerlat_print_summary - print the summary of the hist data to the output */ static void @@ -304,15 +320,15 @@ timerlat_print_summary(struct timerlat_hist_params *params, continue; if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); @@ -327,29 +343,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); } trace_seq_printf(trace->seq, "\n"); @@ -363,29 +373,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_irq / data->hist[cpu].irq_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_user / data->hist[cpu].user_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); } trace_seq_printf(trace->seq, "\n"); @@ -399,29 +403,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -488,15 +486,15 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "count:"); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.user_count); trace_seq_printf(trace->seq, "\n"); @@ -505,16 +503,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_user); + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); trace_seq_printf(trace->seq, "\n"); @@ -522,16 +526,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_irq / sum.irq_count); + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_thread / sum.thread_count); + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_user / sum.user_count); + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); trace_seq_printf(trace->seq, "\n"); @@ -539,16 +549,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_user); + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -655,7 +671,7 @@ static void timerlat_hist_usage(char *usage) " [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", - " [--warm-up s]", + " [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -695,6 +711,7 @@ static void timerlat_hist_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -732,6 +749,9 @@ static struct timerlat_hist_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; params->bucket_size = 1; @@ -772,13 +792,14 @@ static struct timerlat_hist_params {"dump-task", no_argument, 0, '\1'}, {"warm-up", required_argument, 0, '\2'}, {"trace-buffer-size", required_argument, 0, '\3'}, + {"deepest-idle-state", required_argument, 0, '\4'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:", long_options, &option_index); /* detect the end of the options. */ @@ -960,6 +981,9 @@ static struct timerlat_hist_params case '\3': params->buffer_size = get_llong_from_str(optarg); break; + case '\4': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_hist_usage("Invalid option"); } @@ -1064,7 +1088,7 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_hist) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -1152,6 +1176,7 @@ int timerlat_hist_main(int argc, char *argv[]) int return_value = 1; pthread_t timerlat_u; int retval; + int nr_cpus, i; params = timerlat_hist_parse_args(argc, argv); if (!params) @@ -1201,6 +1226,28 @@ int timerlat_hist_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1332,6 +1379,13 @@ out_hist: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1340,6 +1394,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(tool); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 210b0f533534..059b468981e4 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -48,15 +48,16 @@ struct timerlat_top_params { int pretty_output; int warmup; int buffer_size; + int deepest_idle_state; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; }; struct timerlat_top_cpu { - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long cur_irq; unsigned long long min_irq; @@ -280,7 +281,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) /* * Unless trace is being lost, IRQ counter is always the max. */ - trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); @@ -447,7 +448,7 @@ static void timerlat_top_usage(char *usage) "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -481,6 +482,7 @@ static void timerlat_top_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -518,6 +520,9 @@ static struct timerlat_top_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; @@ -550,6 +555,7 @@ static struct timerlat_top_params {"aa-only", required_argument, 0, '5'}, {"warm-up", required_argument, 0, '6'}, {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, {0, 0, 0, 0} }; @@ -726,6 +732,9 @@ static struct timerlat_top_params case '7': params->buffer_size = get_llong_from_str(optarg); break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_top_usage("Invalid option"); } @@ -830,7 +839,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_top) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -850,7 +859,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; @@ -922,6 +931,7 @@ int timerlat_top_main(int argc, char *argv[]) int return_value = 1; char *max_lat; int retval; + int nr_cpus, i; params = timerlat_top_parse_args(argc, argv); if (!params) @@ -971,6 +981,28 @@ int timerlat_top_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1125,6 +1157,13 @@ out_top: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1134,6 +1173,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(top); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 9ac71a66840c..4995d35cf3ec 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -4,6 +4,9 @@ */ #define _GNU_SOURCE +#ifdef HAVE_LIBCPUPOWER_SUPPORT +#include <cpuidle.h> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ #include <dirent.h> #include <stdarg.h> #include <stdlib.h> @@ -211,29 +214,25 @@ long parse_ns_duration(char *val) /* * This is a set of helper functions to use SCHED_DEADLINE. */ -#ifdef __x86_64__ -# define __NR_sched_setattr 314 -# define __NR_sched_getattr 315 -#elif __i386__ -# define __NR_sched_setattr 351 -# define __NR_sched_getattr 352 -#elif __arm__ -# define __NR_sched_setattr 380 -# define __NR_sched_getattr 381 -#elif __aarch64__ || __riscv -# define __NR_sched_setattr 274 -# define __NR_sched_getattr 275 -#elif __powerpc__ -# define __NR_sched_setattr 355 -# define __NR_sched_getattr 356 -#elif __s390x__ -# define __NR_sched_setattr 345 -# define __NR_sched_getattr 346 +#ifndef __NR_sched_setattr +# ifdef __x86_64__ +# define __NR_sched_setattr 314 +# elif __i386__ +# define __NR_sched_setattr 351 +# elif __arm__ +# define __NR_sched_setattr 380 +# elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# elif __powerpc__ +# define __NR_sched_setattr 355 +# elif __s390x__ +# define __NR_sched_setattr 345 +# endif #endif #define SCHED_DEADLINE 6 -static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, +static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } @@ -243,7 +242,7 @@ int __set_sched_attr(int pid, struct sched_attr *attr) int flags = 0; int retval; - retval = sched_setattr(pid, attr, flags); + retval = syscall_sched_setattr(pid, attr, flags); if (retval < 0) { err_msg("Failed to set sched attributes to the pid %d: %s\n", pid, strerror(errno)); @@ -519,6 +518,153 @@ int set_cpu_dma_latency(int32_t latency) return fd; } +#ifdef HAVE_LIBCPUPOWER_SUPPORT +static unsigned int **saved_cpu_idle_disable_state; +static size_t saved_cpu_idle_disable_state_alloc_ctr; + +/* + * save_cpu_idle_state_disable - save disable for all idle states of a cpu + * + * Saves the current disable of all idle states of a cpu, to be subsequently + * restored via restore_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int save_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int nr_cpus; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (saved_cpu_idle_disable_state == NULL) { + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); + if (!saved_cpu_idle_disable_state) + return -1; + } + + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + saved_cpu_idle_disable_state_alloc_ctr++; + + for (state = 0; state < nr_states; state++) { + disabled = cpuidle_is_state_disabled(cpu, state); + if (disabled < 0) + return disabled; + saved_cpu_idle_disable_state[cpu][state] = disabled; + } + + return nr_states; +} + +/* + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu + * + * Restores the current disable state of all idle states of a cpu that was + * previously saved by save_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int restore_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int result; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (!saved_cpu_idle_disable_state) + return -1; + + for (state = 0; state < nr_states; state++) { + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + disabled = saved_cpu_idle_disable_state[cpu][state]; + result = cpuidle_state_disable(cpu, state, disabled); + if (result < 0) + return result; + } + + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + saved_cpu_idle_disable_state_alloc_ctr--; + if (saved_cpu_idle_disable_state_alloc_ctr == 0) { + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; + } + + return nr_states; +} + +/* + * free_cpu_idle_disable_states - free saved idle state disable for all cpus + * + * Frees the memory used for storing cpu idle state disable for all cpus + * and states. + * + * Normally, the memory is freed automatically in + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an + * error. + */ +void free_cpu_idle_disable_states(void) +{ + int cpu; + int nr_cpus; + + if (!saved_cpu_idle_disable_state) + return; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (cpu = 0; cpu < nr_cpus; cpu++) { + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + } + + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; +} + +/* + * set_deepest_cpu_idle_state - limit idle state of cpu + * + * Disables all idle states deeper than the one given in + * deepest_state (assuming states with higher number are deeper). + * + * This is used to reduce the exit from idle latency. Unlike + * set_cpu_dma_latency, it can disable idle states per cpu. + * + * Return: idle state count on success, negative on error + */ +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) +{ + unsigned int nr_states; + unsigned int state; + int result; + + nr_states = cpuidle_state_count(cpu); + + for (state = deepest_state + 1; state < nr_states; state++) { + result = cpuidle_state_disable(cpu, state, 1); + if (result < 0) + return result; + } + + return nr_states; +} +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + #define _STR(x) #x #define STR(x) _STR(x) diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index d44513e6c66a..101d4799a009 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -46,6 +46,7 @@ update_sum(unsigned long long *a, unsigned long long *b) *a += *b; } +#ifndef SCHED_ATTR_SIZE_VER0 struct sched_attr { uint32_t size; uint32_t sched_policy; @@ -56,6 +57,7 @@ struct sched_attr { uint64_t sched_deadline; uint64_t sched_period; }; +#endif /* SCHED_ATTR_SIZE_VER0 */ int parse_prio(char *arg, struct sched_attr *sched_param); int parse_cpu_set(char *cpu_list, cpu_set_t *set); @@ -64,6 +66,19 @@ int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); int set_comm_cgroup(const char *comm_prefix, const char *cgroup); int set_pid_cgroup(pid_t pid, const char *cgroup); int set_cpu_dma_latency(int32_t latency); +#ifdef HAVE_LIBCPUPOWER_SUPPORT +int save_cpu_idle_disable_state(unsigned int cpu); +int restore_cpu_idle_disable_state(unsigned int cpu); +void free_cpu_idle_disable_states(void); +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); +static inline int have_libcpupower_support(void) { return 1; } +#else +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline void free_cpu_idle_disable_states(void) { } +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; } +static inline int have_libcpupower_support(void) { return 0; } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ int auto_house_keeping(cpu_set_t *monitored_cpus); #define ns_to_usf(x) (((double)x/1000)) diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index baffeb960ff0..bdeb98baa8b0 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -29,11 +29,11 @@ class Automata: def __get_model_name(self): basename = ntpath.basename(self.__dot_path) - if basename.endswith(".dot") == False: + if not basename.endswith(".dot") and not basename.endswith(".gv"): print("not a dot file") raise Exception("not a dot file: %s" % self.__dot_path) - model_name = basename[0:-4] + model_name = ntpath.splitext(basename)[0] if model_name.__len__() == 0: raise Exception("not a dot file: %s" % self.__dot_path) @@ -68,9 +68,9 @@ class Automata: def __get_cursor_begin_events(self): cursor = 0 while self.__dot_lines[cursor].split()[0] != "{node": - cursor += 1 + cursor += 1 while self.__dot_lines[cursor].split()[0] == "{node": - cursor += 1 + cursor += 1 # skip initial state transition cursor += 1 return cursor @@ -94,11 +94,11 @@ class Automata: initial_state = state[7:] else: states.append(state) - if self.__dot_lines[cursor].__contains__("doublecircle") == True: + if "doublecircle" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True - if self.__dot_lines[cursor].__contains__("ellipse") == True: + if "ellipse" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True @@ -110,7 +110,7 @@ class Automata: # Insert the initial state at the bein og the states states.insert(0, initial_state) - if has_final_states == False: + if not has_final_states: final_states.append(initial_state) return states, initial_state, final_states @@ -120,7 +120,7 @@ class Automata: cursor = self.__get_cursor_begin_events() events = [] - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': # transitions have the format: # "all_fired" -> "both_fired" [ label = "disable_irq" ]; # ------------ event is here ------------^^^^^ @@ -161,7 +161,7 @@ class Automata: # and we are back! Let's fill the matrix cursor = self.__get_cursor_begin_events() - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': if self.__dot_lines[cursor].split()[1] == "->": line = self.__dot_lines[cursor].split() origin_state = line[0].replace('"','').replace(',','_') diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index f04479ecc96c..f2bbc75a76f4 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -332,7 +332,7 @@ static void ikm_print_header(struct trace_seq *s) * ikm_event_handler - callback to handle event events * * Called any time a rv:"monitor"_event events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_event_handler(struct trace_seq *s, struct tep_record *record, @@ -384,7 +384,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record, * ikm_error_handler - callback to handle error events * * Called any time a rv:"monitor"_errors events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_error_handler(struct trace_seq *s, struct tep_record *record, diff --git a/tools/verification/rv/src/trace.c b/tools/verification/rv/src/trace.c index 2c7deed47f8d..1b9f9bfa1893 100644 --- a/tools/verification/rv/src/trace.c +++ b/tools/verification/rv/src/trace.c @@ -81,7 +81,7 @@ void trace_instance_destroy(struct trace_instance *trace) } /** - * trace_instance_init - create an trace instance + * trace_instance_init - create a trace instance * * It is more than the tracefs instance, as it contains other * things required for the tracing, such as the local events and |