diff options
Diffstat (limited to 'tools')
235 files changed, 11126 insertions, 2198 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index f8129c624b07..f7ddd73a8c0f 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -198,6 +198,15 @@ struct kvm_arm_copy_mte_tags { __u64 reserved[2]; }; +/* + * Counter/Timer offset structure. Describe the virtual/physical offset. + * To be used with KVM_ARM_SET_COUNTER_OFFSET. + */ +struct kvm_arm_counter_offset { + __u64 counter_offset; + __u64 reserved; +}; + #define KVM_ARM_TAGS_TO_GUEST 0 #define KVM_ARM_TAGS_FROM_GUEST 1 @@ -372,6 +381,10 @@ enum { #endif }; +/* Device Control API on vm fd */ +#define KVM_ARM_VM_SMCCC_CTRL 0 +#define KVM_ARM_VM_SMCCC_FILTER 0 + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 @@ -411,6 +424,8 @@ enum { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2 +#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3 #define KVM_ARM_VCPU_PVTIME_CTRL 2 #define KVM_ARM_VCPU_PVTIME_IPA 0 @@ -469,6 +484,27 @@ enum { /* run->fail_entry.hardware_entry_failure_reason codes. */ #define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0) +enum kvm_smccc_filter_action { + KVM_SMCCC_FILTER_HANDLE = 0, + KVM_SMCCC_FILTER_DENY, + KVM_SMCCC_FILTER_FWD_TO_USER, + +#ifdef __KERNEL__ + NR_SMCCC_FILTER_ACTIONS +#endif +}; + +struct kvm_smccc_filter { + __u32 base; + __u32 nr_functions; + __u8 action; + __u8 pad[15]; +}; + +/* arm64-specific KVM_EXIT_HYPERCALL flags */ +#define KVM_HYPERCALL_EXIT_SMC (1U << 0) +#define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + #endif #endif /* __ARM_KVM_H__ */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b89005819cd5..cb8ca46213be 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -97,7 +97,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ +/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ @@ -226,10 +226,9 @@ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ @@ -307,14 +306,21 @@ #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ #define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -331,6 +337,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ #define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ @@ -363,6 +370,7 @@ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ #define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -427,6 +435,13 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ + /* * BUG word(s) */ @@ -467,5 +482,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5dfa4fb76f4b..fafe9be7a6f4 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -75,6 +75,12 @@ # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) #endif +#ifdef CONFIG_ADDRESS_MASKING +# define DISABLE_LAM 0 +#else +# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -115,7 +121,7 @@ #define DISABLED_MASK10 0 #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ DISABLE_CALL_DEPTH_TRACKING) -#define DISABLED_MASK12 0 +#define DISABLED_MASK12 (DISABLE_LAM) #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ad35355ee43e..3aedae61af4f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -206,6 +206,8 @@ /* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ #define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT 2 +#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 7f467fe05d42..1a6a1f987949 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,4 +559,7 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ +/* x86-specific KVM_EXIT_HYPERCALL flags. */ +#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 500b96e71f18..e8d7ebbca1a4 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -16,8 +16,16 @@ #define ARCH_GET_XCOMP_GUEST_PERM 0x1024 #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 +#define ARCH_XCOMP_TILECFG 17 +#define ARCH_XCOMP_TILEDATA 18 + #define ARCH_MAP_VDSO_X32 0x2001 #define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_GET_UNTAG_MASK 0x4001 +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 +#define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 + #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index b8ddfc4c4ab0..bc48a4dabe5d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -2,6 +2,9 @@ #ifndef __NR_fork #define __NR_fork 2 #endif +#ifndef __NR_execve +#define __NR_execve 11 +#endif #ifndef __NR_getppid #define __NR_getppid 64 #endif diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index a91ac666f758..d055b82d22cc 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -10,13 +10,6 @@ .section .noinstr.text, "ax" /* - * We build a jump to memcpy_orig by default which gets NOPped out on - * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which - * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs - * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. - */ - -/* * memcpy - Copy a memory block. * * Input: @@ -26,17 +19,21 @@ * * Output: * rax original destination + * + * The FSRM alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep movsb' itself is small enough to replace the call, but the + * two register moves blow up the code. And one of them is "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_TYPED_FUNC_START(__memcpy) - ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM movq %rdi, %rax movq %rdx, %rcx - shrq $3, %rcx - andl $7, %edx - rep movsq - movl %edx, %ecx rep movsb RET SYM_FUNC_END(__memcpy) @@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) -/* - * memcpy_erms() - enhanced fast string memcpy. This is faster and - * simpler than memcpy. Use memcpy_erms when possible. - */ -SYM_FUNC_START_LOCAL(memcpy_erms) - movq %rdi, %rax - movq %rdx, %rcx - rep movsb - RET -SYM_FUNC_END(memcpy_erms) - SYM_FUNC_START_LOCAL(memcpy_orig) movq %rdi, %rax diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 6143b1a6fa2c..7c59a704c458 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -18,27 +18,22 @@ * rdx count (bytes) * * rax original destination + * + * The FSRS alternative should be done inline (avoiding the call and + * the disgusting return handling), but that would require some help + * from the compiler for better calling conventions. + * + * The 'rep stosb' itself is small enough to replace the call, but all + * the register moves blow up the code. And two of them are "needed" + * only for the return value that is the same as the source input, + * which the compiler could/should do much better anyway. */ SYM_FUNC_START(__memset) - /* - * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended - * to use it when possible. If not available, use fast string instructions. - * - * Otherwise, use original memset function. - */ - ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memset_erms", X86_FEATURE_ERMS + ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 + movb %sil,%al movq %rdx,%rcx - andl $7,%edx - shrq $3,%rcx - /* expand byte value */ - movzbl %sil,%esi - movabs $0x0101010101010101,%rax - imulq %rsi,%rax - rep stosq - movl %edx,%ecx rep stosb movq %r9,%rax RET @@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS(memset, __memset) EXPORT_SYMBOL(memset) -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. - * - * rdi destination - * rsi value (char) - * rdx count (bytes) - * - * rax original destination - */ -SYM_FUNC_START_LOCAL(memset_erms) - movq %rdi,%r9 - movb %sil,%al - movq %rdx,%rcx - rep stosb - movq %r9,%rax - RET -SYM_FUNC_END(memset_erms) - SYM_FUNC_START_LOCAL(memset_orig) movq %rdi,%r10 diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 11250c4734fe..3b7ba037af95 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -28,7 +28,7 @@ MAP COMMANDS | **bpftool** **map** { **show** | **list** } [*MAP*] | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ | **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ -| [**dev** *NAME*] +| [**offload_dev** *NAME*] | **bpftool** **map dump** *MAP* | **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] | **bpftool** **map lookup** *MAP* [**key** *DATA*] @@ -73,7 +73,7 @@ DESCRIPTION maps. On such kernels bpftool will automatically emit this information as well. - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -86,8 +86,8 @@ DESCRIPTION kernel needs it to collect metadata related to the inner maps that the new map will work with. - Keyword **dev** expects a network interface name, and is used - to request hardware offload for the map. + Keyword **offload_dev** expects a network interface name, + and is used to request hardware offload for the map. **bpftool map dump** *MAP* Dump all entries in a given *MAP*. In case of **name**, diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 9443c524bb76..dcae81bd27ed 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -31,7 +31,7 @@ PROG COMMANDS | **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] | **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] +| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** @@ -129,7 +129,7 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] Load bpf program(s) from binary *OBJ* and pin as *PATH*. **bpftool prog load** pins only the first program from the *OBJ* as *PATH*. **bpftool prog loadall** pins all programs @@ -143,8 +143,11 @@ DESCRIPTION to be replaced in the ELF file counting from 0, while *NAME* allows to replace a map by name. *MAP* specifies the map to use, referring to it by **id** or through a **pinned** file. - If **dev** *NAME* is specified program will be loaded onto - given networking device (offload). + If **offload_dev** *NAME* is specified program will be loaded + onto given networking device (offload). + If **xdpmeta_dev** *NAME* is specified program will become + device-bound without offloading, this facilitates access + to XDP metadata. Optional **pinmaps** argument can be provided to pin all maps under *MAP_DIR* directory. diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index e7234d1a5306..085bf18f3659 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -278,7 +278,7 @@ _bpftool() _bpftool_get_prog_tags return 0 ;; - dev) + dev|offload_dev|xdpmeta_dev) _sysfs_get_netdevs return 0 ;; @@ -508,7 +508,8 @@ _bpftool() ;; *) COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) - _bpftool_once_attr 'type dev pinmaps autoattach' + _bpftool_once_attr 'type pinmaps autoattach' + _bpftool_one_of_list 'offload_dev xdpmeta_dev' return 0 ;; esac @@ -733,7 +734,7 @@ _bpftool() esac ;; *) - _bpftool_once_attr 'type key value entries name flags dev' + _bpftool_once_attr 'type key value entries name flags offload_dev' if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then _bpftool_once_attr 'inner_map' fi diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 1360c82ae732..cc6e6aae2447 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -68,7 +68,7 @@ void p_info(const char *fmt, ...) va_end(ap); } -static bool is_bpffs(char *path) +static bool is_bpffs(const char *path) { struct statfs st_fs; @@ -244,13 +244,16 @@ int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) return fd; } -int mount_bpffs_for_pin(const char *name) +int mount_bpffs_for_pin(const char *name, bool is_dir) { char err_str[ERR_MAX_LEN]; char *file; char *dir; int err = 0; + if (is_dir && is_bpffs(name)) + return err; + file = malloc(strlen(name) + 1); if (!file) { p_err("mem alloc failed"); @@ -286,7 +289,7 @@ int do_pin_fd(int fd, const char *name) { int err; - err = mount_bpffs_for_pin(name); + err = mount_bpffs_for_pin(name, false); if (err) return err; diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index da16e6a27ccc..0675d6a46413 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -167,12 +167,12 @@ static int get_vendor_id(int ifindex) return strtol(buf, NULL, 0); } -static int read_procfs(const char *path) +static long read_procfs(const char *path) { char *endptr, *line = NULL; size_t len = 0; FILE *fd; - int res; + long res; fd = fopen(path, "r"); if (!fd) @@ -194,7 +194,7 @@ static int read_procfs(const char *path) static void probe_unprivileged_disabled(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -216,14 +216,14 @@ static void probe_unprivileged_disabled(void) printf("Unable to retrieve required privileges for bpf() syscall\n"); break; default: - printf("bpf() syscall restriction has unknown value %d\n", res); + printf("bpf() syscall restriction has unknown value %ld\n", res); } } } static void probe_jit_enable(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -245,7 +245,7 @@ static void probe_jit_enable(void) printf("Unable to retrieve JIT-compiler status\n"); break; default: - printf("JIT-compiler status has unknown value %d\n", + printf("JIT-compiler status has unknown value %ld\n", res); } } @@ -253,7 +253,7 @@ static void probe_jit_enable(void) static void probe_jit_harden(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -275,7 +275,7 @@ static void probe_jit_harden(void) printf("Unable to retrieve JIT hardening status\n"); break; default: - printf("JIT hardening status has unknown value %d\n", + printf("JIT hardening status has unknown value %ld\n", res); } } @@ -283,7 +283,7 @@ static void probe_jit_harden(void) static void probe_jit_kallsyms(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -302,14 +302,14 @@ static void probe_jit_kallsyms(void) printf("Unable to retrieve JIT kallsyms export status\n"); break; default: - printf("JIT kallsyms exports status has unknown value %d\n", res); + printf("JIT kallsyms exports status has unknown value %ld\n", res); } } } static void probe_jit_limit(void) { - int res; + long res; /* No support for C-style ouptut */ @@ -322,7 +322,7 @@ static void probe_jit_limit(void) printf("Unable to retrieve global memory limit for JIT compiler for unprivileged users\n"); break; default: - printf("Global memory limit for JIT compiler for unprivileged users is %d bytes\n", res); + printf("Global memory limit for JIT compiler for unprivileged users is %ld bytes\n", res); } } } diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 9a1d2365a297..6b0e5202ca7a 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -76,7 +76,7 @@ static int do_pin(int argc, char **argv) goto close_obj; } - err = mount_bpffs_for_pin(path); + err = mount_bpffs_for_pin(path, false); if (err) goto close_link; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index d98dbc50cf4c..2d786072ed0d 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -195,6 +195,8 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) show_link_attach_type_json(info->tracing.attach_type, json_wtr); + jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); + jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); break; case BPF_LINK_TYPE_CGROUP: jsonw_lluint_field(json_wtr, "cgroup_id", @@ -212,7 +214,10 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_NETFILTER: netfilter_dump_json(info, json_wtr); break; - + case BPF_LINK_TYPE_STRUCT_OPS: + jsonw_uint_field(json_wtr, "map_id", + info->struct_ops.map_id); + break; default: break; } @@ -245,7 +250,10 @@ static void show_link_header_plain(struct bpf_link_info *info) else printf("type %u ", info->type); - printf("prog %u ", info->prog_id); + if (info->type == BPF_LINK_TYPE_STRUCT_OPS) + printf("map %u ", info->struct_ops.map_id); + else + printf("prog %u ", info->prog_id); } static void show_link_attach_type_plain(__u32 attach_type) @@ -369,6 +377,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tprog_type %u ", prog_info.type); show_link_attach_type_plain(info->tracing.attach_type); + if (info->tracing.target_obj_id || info->tracing.target_btf_id) + printf("\n\ttarget_obj_id %u target_btf_id %u ", + info->tracing.target_obj_id, + info->tracing.target_btf_id); break; case BPF_LINK_TYPE_CGROUP: printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index a49534d7eafa..b8bb08d10dec 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -142,7 +142,7 @@ const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); int open_obj_pinned(const char *path, bool quiet); int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); -int mount_bpffs_for_pin(const char *name); +int mount_bpffs_for_pin(const char *name, bool is_dir); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index aaeb8939e137..f98f7bbea2b1 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -139,6 +139,9 @@ static void print_entry_json(struct bpf_map_info *info, unsigned char *key, print_hex_data_json(key, info->key_size); jsonw_name(json_wtr, "value"); print_hex_data_json(value, info->value_size); + if (map_is_map_of_maps(info->type)) + jsonw_uint_field(json_wtr, "inner_map_id", + *(unsigned int *)value); if (btf) { struct btf_dumper d = { .btf = btf, @@ -259,8 +262,13 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, } if (info->value_size) { - printf("value:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, value, info->value_size, " "); + if (map_is_map_of_maps(info->type)) { + printf("inner_map_id:%c", break_names ? '\n' : ' '); + printf("%u ", *(unsigned int *)value); + } else { + printf("value:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, value, info->value_size, " "); + } } printf("\n"); @@ -1279,6 +1287,11 @@ static int do_create(int argc, char **argv) "flags")) goto exit; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: NEXT_ARG(); if (attr.map_ifindex) { @@ -1423,7 +1436,7 @@ static int do_help(int argc, char **argv) "Usage: %1$s %2$s { show | list } [MAP]\n" " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" - " [inner_map MAP] [dev NAME]\n" + " [inner_map MAP] [offload_dev NAME]\n" " %1$s %2$s dump MAP\n" " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" " %1$s %2$s lookup MAP [key DATA]\n" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 91b6075b2db3..8443a149dd17 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1517,12 +1517,13 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; const char *pinmaps = NULL; + __u32 xdpmeta_ifindex = 0; + __u32 offload_ifindex = 0; bool auto_attach = false; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; unsigned int i, j; - __u32 ifindex = 0; const char *file; int idx, err; @@ -1614,17 +1615,46 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) map_replace[old_map_fds].fd = fd; old_map_fds++; } else if (is_prefix(*argv, "dev")) { + p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n" + "Going further, please use 'offload_dev <ifname>' to offload program to device.\n" + "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'."); + goto offload_dev; + } else if (is_prefix(*argv, "offload_dev")) { +offload_dev: NEXT_ARG(); - if (ifindex) { - p_err("offload device already specified"); + if (offload_ifindex) { + p_err("offload_dev already specified"); + goto err_free_reuse_maps; + } else if (xdpmeta_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); + goto err_free_reuse_maps; + } + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + offload_ifindex = if_nametoindex(*argv); + if (!offload_ifindex) { + p_err("unrecognized netdevice '%s': %s", + *argv, strerror(errno)); + goto err_free_reuse_maps; + } + NEXT_ARG(); + } else if (is_prefix(*argv, "xdpmeta_dev")) { + NEXT_ARG(); + + if (xdpmeta_ifindex) { + p_err("xdpmeta_dev already specified"); + goto err_free_reuse_maps; + } else if (offload_ifindex) { + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); goto err_free_reuse_maps; } if (!REQ_ARGS(1)) goto err_free_reuse_maps; - ifindex = if_nametoindex(*argv); - if (!ifindex) { + xdpmeta_ifindex = if_nametoindex(*argv); + if (!xdpmeta_ifindex) { p_err("unrecognized netdevice '%s': %s", *argv, strerror(errno)); goto err_free_reuse_maps; @@ -1671,7 +1701,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - bpf_program__set_ifindex(pos, ifindex); + if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) { + bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY); + bpf_program__set_ifindex(pos, xdpmeta_ifindex); + } else { + bpf_program__set_ifindex(pos, offload_ifindex); + } if (bpf_program__type(pos) != prog_type) bpf_program__set_type(pos, prog_type); bpf_program__set_expected_attach_type(pos, expected_attach_type); @@ -1709,7 +1744,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) idx = 0; bpf_object__for_each_map(map, obj) { if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) - bpf_map__set_ifindex(map, ifindex); + bpf_map__set_ifindex(map, offload_ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { err = bpf_map__reuse_fd(map, map_replace[j++].fd); @@ -1739,7 +1774,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - err = mount_bpffs_for_pin(pinfile); + err = mount_bpffs_for_pin(pinfile, !first_prog_only); if (err) goto err_close_obj; @@ -2416,7 +2451,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" " %1$s %2$s pin PROG FILE\n" " %1$s %2$s { load | loadall } OBJ PATH \\\n" - " [type TYPE] [dev NAME] \\\n" + " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n" " [map { idx IDX | name NAME } MAP]\\\n" " [pinmaps MAP_DIR]\n" " [autoattach]\n" diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 57c3da70aa31..3ebc9fe91e0e 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -509,7 +509,7 @@ static int do_register(int argc, char **argv) if (argc == 1) linkdir = GET_ARG(); - if (linkdir && mount_bpffs_for_pin(linkdir)) { + if (linkdir && mount_bpffs_for_pin(linkdir, true)) { p_err("can't mount bpffs for pinning"); return -1; } diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index c61d061247e1..52a0be45410c 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -94,7 +94,7 @@ static void print_attributes(struct gpio_v2_line_info *info) for (i = 0; i < info->num_attrs; i++) { if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE) fprintf(stdout, ", debounce_period=%dusec", - info->attrs[0].debounce_period_us); + info->attrs[i].debounce_period_us); } } diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h index b54bd860dff6..7ce02a223732 100644 --- a/tools/include/asm/alternative.h +++ b/tools/include/asm/alternative.h @@ -4,7 +4,6 @@ /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ -#define altinstruction_entry # -#define ALTERNATIVE_2 # +#define ALTERNATIVE # #endif diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index cef3b1c25335..51ac441a37c3 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -21,19 +21,6 @@ */ #define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) -/* CoreSight trace ID is currently the bottom 7 bits of the value */ -#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) - -/* - * perf record will set the legacy meta data values as unused initially. - * This allows perf report to manage the decoders created when dynamic - * allocation in operation. - */ -#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) - -/* Value to set for unused trace ID values */ -#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F - /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 642808520d92..a87bbbbca2d4 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -972,6 +972,19 @@ extern "C" { #define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) #define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) #define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ #define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) #define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) #define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) @@ -1012,7 +1025,37 @@ extern "C" { #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ #define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ #define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) #define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) @@ -1104,8 +1147,13 @@ extern "C" { * struct as the output. * * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Planes are valid until one has a - * zero handle -- this can be used to compute the number of planes. + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. * * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid * until one has a zero &drm_mode_fb_cmd2.pitches. @@ -1113,6 +1161,11 @@ extern "C" { * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 8df261c5ab9b..dba7c5a5b25e 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2491,7 +2491,7 @@ struct i915_context_param_engines { #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ #define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ - struct i915_engine_class_instance engines[0]; + struct i915_engine_class_instance engines[]; } __attribute__((packed)); #define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ @@ -2676,6 +2676,10 @@ enum drm_i915_oa_format { I915_OAR_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_A24u40_A14u32_B8_C8, + /* MTL OAM */ + I915_OAM_FORMAT_MPEC8u64_B8_C8, + I915_OAM_FORMAT_MPEC8u32_B8_C8, + I915_OA_FORMAT_MAX /* non-ABI */ }; @@ -2758,6 +2762,25 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_POLL_OA_PERIOD, + /** + * Multiple engines may be mapped to the same OA unit. The OA unit is + * identified by class:instance of any engine mapped to it. + * + * This parameter specifies the engine class and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_CLASS, + + /** + * This parameter specifies the engine instance and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1bb11a6ee667..6961a7b70028 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1035,6 +1035,7 @@ enum bpf_attach_type { BPF_TRACE_KPROBE_MULTI, BPF_LSM_CGROUP, BPF_STRUCT_OPS, + BPF_NETFILTER, __MAX_BPF_ATTACH_TYPE }; @@ -1272,6 +1273,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1424,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index af2a44c08683..a429381e7ca5 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,7 +28,7 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) -#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1) +#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 07a4cb149305..e682ab628dfa 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -162,6 +162,8 @@ struct in_addr { #define MCAST_MSFILTER 48 #define IP_MULTICAST_ALL 49 #define IP_UNICAST_IF 50 +#define IP_LOCAL_PORT_RANGE 51 +#define IP_PROTOCOL 52 #define MCAST_EXCLUDE 0 #define MCAST_INCLUDE 1 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 4003a166328c..737318b1c1d9 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -341,8 +341,13 @@ struct kvm_run { __u64 nr; __u64 args[6]; __u64 ret; - __u32 longmode; - __u32 pad; + + union { +#ifndef __KERNEL__ + __u32 longmode; +#endif + __u64 flags; + }; } hypercall; /* KVM_EXIT_TPR_ACCESS */ struct { @@ -1184,6 +1189,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226 +#define KVM_CAP_COUNTER_OFFSET 227 #ifdef KVM_CAP_IRQ_ROUTING @@ -1543,6 +1549,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) +/* Available with KVM_CAP_COUNTER_OFFSET */ +#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 759b3f53e53f..f23d9a16507f 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -290,6 +290,8 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +#define PR_GET_AUXV 0x41555856 + #define PR_SET_MEMORY_MERGE 67 #define PR_GET_MEMORY_MERGE 68 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index de6810e94abe..0aa955aa8246 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -429,9 +429,14 @@ struct snd_pcm_sw_params { snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */ snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */ snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */ - snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */ - snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */ - snd_pcm_uframes_t silence_size; /* silence block size */ + /* + * The following two thresholds alleviate playback buffer underruns; when + * hw_avail drops below the threshold, the respective action is triggered: + */ + snd_pcm_uframes_t stop_threshold; /* - stop playback */ + snd_pcm_uframes_t silence_threshold; /* - pre-fill buffer with silence */ + snd_pcm_uframes_t silence_size; /* max size of silence pre-fill; when >= boundary, + * fill played area with silence immediately */ snd_pcm_uframes_t boundary; /* pointers wrap point */ unsigned int proto; /* protocol version */ unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */ @@ -570,7 +575,8 @@ struct __snd_pcm_mmap_status64 { struct __snd_pcm_mmap_control64 { __pad_before_uframe __pad1; snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */ - __pad_before_uframe __pad2; + __pad_before_uframe __pad2; // This should be __pad_after_uframe, but binary + // backwards compatibility constraints prevent a fix. __pad_before_uframe __pad3; snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 128ac723c4ea..ed86b37d8024 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -572,20 +572,30 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co (void *)keys, (void *)values, count, opts); } -int bpf_obj_pin(int fd, const char *pathname) +int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int ret; + if (!OPTS_VALID(opts, bpf_obj_pin_opts)) + return libbpf_err(-EINVAL); + memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); + attr.file_flags = OPTS_GET(opts, file_flags, 0); attr.bpf_fd = fd; ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); return libbpf_err_errno(ret); } +int bpf_obj_pin(int fd, const char *pathname) +{ + return bpf_obj_pin_opts(fd, pathname, NULL); +} + int bpf_obj_get(const char *pathname) { return bpf_obj_get_opts(pathname, NULL); @@ -593,7 +603,7 @@ int bpf_obj_get(const char *pathname) int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); union bpf_attr attr; int fd; @@ -601,6 +611,7 @@ int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) return libbpf_err(-EINVAL); memset(&attr, 0, attr_sz); + attr.path_fd = OPTS_GET(opts, path_fd, 0); attr.pathname = ptr_to_u64((void *)pathname); attr.file_flags = OPTS_GET(opts, file_flags, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index a2c091389b18..9aa0ee473754 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -284,16 +284,30 @@ LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values __u32 *count, const struct bpf_map_batch_opts *opts); -struct bpf_obj_get_opts { +struct bpf_obj_pin_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 file_flags; + int path_fd; size_t :0; }; -#define bpf_obj_get_opts__last_field file_flags +#define bpf_obj_pin_opts__last_field path_fd LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); +LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, + const struct bpf_obj_pin_opts *opts); + +struct bpf_obj_get_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 file_flags; + int path_fd; + + size_t :0; +}; +#define bpf_obj_get_opts__last_field path_fd + LIBBPF_API int bpf_obj_get(const char *pathname); LIBBPF_API int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 929a3baca8ef..bbab9ad9dc5a 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -77,16 +77,21 @@ /* * Helper macros to manipulate data structures */ -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER) -#endif -#ifndef container_of + +/* offsetof() definition that uses __builtin_offset() might not preserve field + * offset CO-RE relocation properly, so force-redefine offsetof() using + * old-school approach which works with CO-RE correctly + */ +#undef offsetof +#define offsetof(type, member) ((unsigned long)&((type *)0)->member) + +/* redefined container_of() to ensure we use the above offsetof() macro */ +#undef container_of #define container_of(ptr, type, member) \ ({ \ void *__mptr = (void *)(ptr); \ ((type *)(__mptr - offsetof(type, member))); \ }) -#endif /* * Compiler (optimization) barrier. diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 6fb3d0f9af17..be076a4041ab 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -351,6 +351,7 @@ struct pt_regs___arm64 { * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions */ +/* riscv provides struct user_regs_struct instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) #define __PT_PARM1_REG a0 #define __PT_PARM2_REG a1 @@ -383,7 +384,7 @@ struct pt_regs___arm64 { * https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf */ -/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +/* arc provides struct user_regs_struct instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) #define __PT_PARM1_REG scratch.r0 #define __PT_PARM2_REG scratch.r1 diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 0a2c079244b6..8484b563b53d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1064,7 +1064,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) int err = 0; long sz; - f = fopen(path, "rb"); + f = fopen(path, "rbe"); if (!f) { err = -errno; goto err_out; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 580985ee5545..4d9f30bf7f01 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -2250,9 +2250,25 @@ static int btf_dump_type_data_check_overflow(struct btf_dump *d, const struct btf_type *t, __u32 id, const void *data, - __u8 bits_offset) + __u8 bits_offset, + __u8 bit_sz) { - __s64 size = btf__resolve_size(d->btf, id); + __s64 size; + + if (bit_sz) { + /* bits_offset is at most 7. bit_sz is at most 128. */ + __u8 nr_bytes = (bits_offset + bit_sz + 7) / 8; + + /* When bit_sz is non zero, it is called from + * btf_dump_struct_data() where it only cares about + * negative error value. + * Return nr_bytes in success case to make it + * consistent as the regular integer case below. + */ + return data + nr_bytes > d->typed_dump->data_end ? -E2BIG : nr_bytes; + } + + size = btf__resolve_size(d->btf, id); if (size < 0 || size >= INT_MAX) { pr_warn("unexpected size [%zu] for id [%u]\n", @@ -2407,7 +2423,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, { int size, err = 0; - size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset, bit_sz); if (size < 0) return size; err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 83e8e3bfd8ff..cf3323fd47b8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -703,17 +703,17 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo /* obtain fd in BPF_REG_9 */ emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); - /* jump to fd_array store if fd denotes module BTF */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); - /* set the default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip BTF fd store for vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* store BTF fd in slot */ + /* store BTF fd in slot, 0 for vmlinux */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); + /* jump to insn[insn_idx].off store if fd denotes module BTF */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); + /* set the default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip BTF fd store for vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); /* store index into insn[insn_idx].off */ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); log: diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ad1ec893b41b..214f828ece6b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -117,6 +117,7 @@ static const char * const attach_type_name[] = { [BPF_PERF_EVENT] = "perf_event", [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", [BPF_STRUCT_OPS] = "struct_ops", + [BPF_NETFILTER] = "netfilter", }; static const char * const link_type_name[] = { @@ -1500,16 +1501,36 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return map; } -static size_t bpf_map_mmap_sz(const struct bpf_map *map) +static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) { - long page_sz = sysconf(_SC_PAGE_SIZE); + const long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = (size_t)roundup(value_sz, 8) * max_entries; map_sz = roundup(map_sz, page_sz); return map_sz; } +static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) +{ + void *mmaped; + + if (!map->mmaped) + return -EINVAL; + + if (old_sz == new_sz) + return 0; + + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (mmaped == MAP_FAILED) + return -errno; + + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); + munmap(map->mmaped, old_sz); + map->mmaped = mmaped; + return 0; +} + static char *internal_map_name(struct bpf_object *obj, const char *real_name) { char map_name[BPF_OBJ_NAME_LEN], *p; @@ -1608,6 +1629,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, { struct bpf_map_def *def; struct bpf_map *map; + size_t mmap_sz; int err; map = bpf_object__add_map(obj); @@ -1642,7 +1664,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", map->name, map->sec_idx, map->sec_offset, def->map_flags); - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { err = -errno; @@ -4329,7 +4352,7 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); memset(info, 0, sizeof(*info)); - fp = fopen(file, "r"); + fp = fopen(file, "re"); if (!fp) { err = -errno; pr_warn("failed to open %s: %d. No procfs support?\n", file, @@ -4392,18 +4415,17 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (!new_name) return libbpf_err(-errno); - new_fd = open("/", O_RDONLY | O_CLOEXEC); + /* + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. + * This is similar to what we do in ensure_good_fd(), but without + * closing original FD. + */ + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); if (new_fd < 0) { err = -errno; goto err_free_new_name; } - new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) { - err = -errno; - goto err_close_new_fd; - } - err = zclose(map->fd); if (err) { err = -errno; @@ -7433,7 +7455,7 @@ int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) int ret, err = 0; FILE *f; - f = fopen("/proc/kallsyms", "r"); + f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; pr_warn("failed to open /proc/kallsyms: %d\n", err); @@ -8294,7 +8316,10 @@ static void bpf_map__destroy(struct bpf_map *map) map->init_slots_sz = 0; if (map->mmaped) { - munmap(map->mmaped, bpf_map_mmap_sz(map)); + size_t mmap_sz; + + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + munmap(map->mmaped, mmap_sz); map->mmaped = NULL; } @@ -8712,7 +8737,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), - SEC_DEF("netfilter", NETFILTER, 0, SEC_NONE), + SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), }; static size_t custom_sec_def_cnt; @@ -9412,10 +9437,103 @@ __u32 bpf_map__value_size(const struct bpf_map *map) return map->def.value_size; } +static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) +{ + struct btf *btf; + struct btf_type *datasec_type, *var_type; + struct btf_var_secinfo *var; + const struct btf_type *array_type; + const struct btf_array *array; + int vlen, element_sz, new_array_id; + __u32 nr_elements; + + /* check btf existence */ + btf = bpf_object__btf(map->obj); + if (!btf) + return -ENOENT; + + /* verify map is datasec */ + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); + if (!btf_is_datasec(datasec_type)) { + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify datasec has at least one var */ + vlen = btf_vlen(datasec_type); + if (vlen == 0) { + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify last var in the datasec is an array */ + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); + if (!btf_is_array(array_type)) { + pr_warn("map '%s': cannot be resized, last var must be an array\n", + bpf_map__name(map)); + return -EINVAL; + } + + /* verify request size aligns with array */ + array = btf_array(array_type); + element_sz = btf__resolve_size(btf, array->type); + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", + bpf_map__name(map), element_sz, size); + return -EINVAL; + } + + /* create a new array based on the existing array, but with new length */ + nr_elements = (size - var->offset) / element_sz; + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); + if (new_array_id < 0) + return new_array_id; + + /* adding a new btf type invalidates existing pointers to btf objects, + * so refresh pointers before proceeding + */ + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); + var = &btf_var_secinfos(datasec_type)[vlen - 1]; + var_type = btf_type_by_id(btf, var->type); + + /* finally update btf info */ + datasec_type->size = size; + var->size = size - var->offset; + var_type->type = new_array_id; + + return 0; +} + int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { if (map->fd >= 0) return libbpf_err(-EBUSY); + + if (map->mmaped) { + int err; + size_t mmap_old_sz, mmap_new_sz; + + mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); + mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); + if (err) { + pr_warn("map '%s': failed to resize memory-mapped region: %d\n", + bpf_map__name(map), err); + return err; + } + err = map_btf_datasec_resize(map, size); + if (err && err != -ENOENT) { + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", + bpf_map__name(map), err); + map->btf_value_type_id = 0; + map->btf_key_type_id = 0; + } + } + map->def.value_size = size; return 0; } @@ -9441,7 +9559,7 @@ int bpf_map__set_initial_value(struct bpf_map *map, return 0; } -const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) { if (!map->mmaped) return NULL; @@ -9957,7 +10075,7 @@ static int parse_uint_from_file(const char *file, const char *fmt) int err, ret; FILE *f; - f = fopen(file, "r"); + f = fopen(file, "re"); if (!f) { err = -errno; pr_debug("failed to open '%s': %s\n", file, @@ -12693,7 +12811,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) for (i = 0; i < s->map_cnt; i++) { struct bpf_map *map = *s->maps[i].map; - size_t mmap_sz = bpf_map_mmap_sz(map); + size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); int prot, map_fd = bpf_map__fd(map); void **mmaped = s->maps[i].mmaped; @@ -12720,8 +12838,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) * as per normal clean up procedure, so we don't need to worry * about it from skeleton's clean up perspective. */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, - MAP_SHARED | MAP_FIXED, map_fd, 0); + *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); if (*mmaped == MAP_FAILED) { err = -errno; *mmaped = NULL; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0b7362397ea3..754da73c643b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -869,8 +869,22 @@ LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); /* get/set map key size */ LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); -/* get/set map value size */ +/* get map value size */ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +/** + * @brief **bpf_map__set_value_size()** sets map value size. + * @param map the BPF map instance + * @return 0, on success; negative error, otherwise + * + * There is a special case for maps with associated memory-mapped regions, like + * the global data section maps (bss, data, rodata). When this function is used + * on such a map, the mapped region is resized. Afterward, an attempt is made to + * adjust the corresponding BTF info. This attempt is best-effort and can only + * succeed if the last variable of the data section map is an array. The array + * BTF type is replaced by a new BTF array type with a different length. + * Any previously existing pointers returned from bpf_map__initial_value() or + * corresponding data section skeleton pointer must be reinitialized. + */ LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); /* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); @@ -884,7 +898,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); /** * @brief **bpf_map__is_internal()** tells the caller whether or not the diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index a5aa3a383d69..7521a2fb7626 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,3 +391,8 @@ LIBBPF_1.2.0 { bpf_map_get_info_by_fd; bpf_prog_get_info_by_fd; } LIBBPF_1.1.0; + +LIBBPF_1.3.0 { + global: + bpf_obj_pin_opts; +} LIBBPF_1.2.0; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6065f408a59c..9c4db90b92b6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -38,7 +38,7 @@ static __u32 get_ubuntu_kernel_version(void) if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0) return 0; - f = fopen(ubuntu_kver_file, "r"); + f = fopen(ubuntu_kver_file, "re"); if (!f) return 0; @@ -180,7 +180,9 @@ static int probe_prog_load(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: + break; case BPF_PROG_TYPE_NETFILTER: + opts.expected_attach_type = BPF_NETFILTER; break; default: return -EOPNOTSUPP; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 1fd2eeac5cfc..290411ddb39e 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 2 +#define LIBBPF_MINOR_VERSION 3 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 086eef355ab3..f1a141555f08 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -466,7 +466,7 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, proceed: sprintf(line, "/proc/%d/maps", pid); - f = fopen(line, "r"); + f = fopen(line, "re"); if (!f) { err = -errno; pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", @@ -954,8 +954,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct spec_map_fd = bpf_map__fd(man->specs_map); ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); - /* TODO: perform path resolution similar to uprobe's */ - fd = open(path, O_RDONLY); + fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile new file mode 100644 index 000000000000..d664b36deb5b --- /dev/null +++ b/tools/net/ynl/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +SUBDIRS = lib generated samples + +all: $(SUBDIRS) + +$(SUBDIRS): + @if [ -f "$@/Makefile" ] ; then \ + $(MAKE) -C $@ ; \ + fi + +clean hardclean: + @for dir in $(SUBDIRS) ; do \ + if [ -f "$$dir/Makefile" ] ; then \ + $(MAKE) -C $$dir $@; \ + fi \ + done + +.PHONY: clean all $(SUBDIRS) diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile new file mode 100644 index 000000000000..8ce610910b8d --- /dev/null +++ b/tools/net/ynl/generated/Makefile @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0 + +CC=gcc +CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ + -I../lib/ +ifeq ("$(DEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif + +TOOL:=../ynl-gen-c.py + +GENS:=devlink handshake fou netdev +SRCS=$(patsubst %,%-user.c,${GENS}) +HDRS=$(patsubst %,%-user.h,${GENS}) +OBJS=$(patsubst %,%-user.o,${GENS}) + +all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) regen + +protos.a: $(OBJS) + @echo -e "\tAR $@" + @ar rcs $@ $(OBJS) + +%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) + @echo -e "\tGEN $@" + @$(TOOL) --mode user --header --spec $< > $@ + +%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL) + @echo -e "\tGEN $@" + @$(TOOL) --mode user --source --spec $< > $@ + +%-user.o: %-user.c %-user.h + @echo -e "\tCC $@" + @$(COMPILE.c) -c -o $@ $< + +clean: + rm -f *.o + +hardclean: clean + rm -f *.c *.h *.a + +regen: + @../ynl-regen.sh + +.PHONY: all clean hardclean regen +.DEFAULT_GOAL: all diff --git a/tools/net/ynl/generated/devlink-user.c b/tools/net/ynl/generated/devlink-user.c new file mode 100644 index 000000000000..939bd45feaca --- /dev/null +++ b/tools/net/ynl/generated/devlink-user.c @@ -0,0 +1,721 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/devlink.yaml */ +/* YNL-GEN user source */ + +#include <stdlib.h> +#include <string.h> +#include "devlink-user.h" +#include "ynl.h" +#include <linux/devlink.h> + +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +/* Enums */ +static const char * const devlink_op_strmap[] = { + [3] = "get", + [DEVLINK_CMD_INFO_GET] = "info-get", +}; + +const char *devlink_op_str(int op) +{ + if (op < 0 || op >= (int)MNL_ARRAY_SIZE(devlink_op_strmap)) + return NULL; + return devlink_op_strmap[op]; +} + +/* Policies */ +struct ynl_policy_attr devlink_dl_info_version_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, +}; + +struct ynl_policy_nest devlink_dl_info_version_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_info_version_policy, +}; + +struct ynl_policy_attr devlink_dl_reload_stats_entry_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest devlink_dl_reload_stats_entry_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_reload_stats_entry_policy, +}; + +struct ynl_policy_attr devlink_dl_reload_act_stats_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, }, +}; + +struct ynl_policy_nest devlink_dl_reload_act_stats_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_reload_act_stats_policy, +}; + +struct ynl_policy_attr devlink_dl_reload_act_info_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, +}; + +struct ynl_policy_nest devlink_dl_reload_act_info_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_reload_act_info_policy, +}; + +struct ynl_policy_attr devlink_dl_reload_stats_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, +}; + +struct ynl_policy_nest devlink_dl_reload_stats_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_reload_stats_policy, +}; + +struct ynl_policy_attr devlink_dl_dev_stats_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, + [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, +}; + +struct ynl_policy_nest devlink_dl_dev_stats_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_dl_dev_stats_policy, +}; + +struct ynl_policy_attr devlink_policy[DEVLINK_ATTR_MAX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .name = "bus-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_DEV_NAME] = { .name = "dev-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_PORT_INDEX] = { .name = "port-index", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_INFO_DRIVER_NAME] = { .name = "info-driver-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_INFO_SERIAL_NUMBER] = { .name = "info-serial-number", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_INFO_VERSION_FIXED] = { .name = "info-version-fixed", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, + [DEVLINK_ATTR_INFO_VERSION_RUNNING] = { .name = "info-version-running", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, + [DEVLINK_ATTR_INFO_VERSION_STORED] = { .name = "info-version-stored", .type = YNL_PT_NEST, .nest = &devlink_dl_info_version_nest, }, + [DEVLINK_ATTR_INFO_VERSION_NAME] = { .name = "info-version-name", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_INFO_VERSION_VALUE] = { .name = "info-version-value", .type = YNL_PT_NUL_STR, }, + [DEVLINK_ATTR_RELOAD_FAILED] = { .name = "reload-failed", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RELOAD_ACTION] = { .name = "reload-action", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_DEV_STATS] = { .name = "dev-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_dev_stats_nest, }, + [DEVLINK_ATTR_RELOAD_STATS] = { .name = "reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, + [DEVLINK_ATTR_RELOAD_STATS_ENTRY] = { .name = "reload-stats-entry", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_entry_nest, }, + [DEVLINK_ATTR_RELOAD_STATS_LIMIT] = { .name = "reload-stats-limit", .type = YNL_PT_U8, }, + [DEVLINK_ATTR_RELOAD_STATS_VALUE] = { .name = "reload-stats-value", .type = YNL_PT_U32, }, + [DEVLINK_ATTR_REMOTE_RELOAD_STATS] = { .name = "remote-reload-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_stats_nest, }, + [DEVLINK_ATTR_RELOAD_ACTION_INFO] = { .name = "reload-action-info", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_info_nest, }, + [DEVLINK_ATTR_RELOAD_ACTION_STATS] = { .name = "reload-action-stats", .type = YNL_PT_NEST, .nest = &devlink_dl_reload_act_stats_nest, }, +}; + +struct ynl_policy_nest devlink_nest = { + .max_attr = DEVLINK_ATTR_MAX, + .table = devlink_policy, +}; + +/* Common nested types */ +void devlink_dl_info_version_free(struct devlink_dl_info_version *obj) +{ + free(obj->info_version_name); + free(obj->info_version_value); +} + +int devlink_dl_info_version_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_info_version *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_INFO_VERSION_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_version_name_len = len; + dst->info_version_name = malloc(len + 1); + memcpy(dst->info_version_name, mnl_attr_get_str(attr), len); + dst->info_version_name[len] = 0; + } else if (type == DEVLINK_ATTR_INFO_VERSION_VALUE) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_version_value_len = len; + dst->info_version_value = malloc(len + 1); + memcpy(dst->info_version_value, mnl_attr_get_str(attr), len); + dst->info_version_value[len] = 0; + } + } + + return 0; +} + +void +devlink_dl_reload_stats_entry_free(struct devlink_dl_reload_stats_entry *obj) +{ +} + +int devlink_dl_reload_stats_entry_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_stats_entry *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS_LIMIT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats_limit = 1; + dst->reload_stats_limit = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RELOAD_STATS_VALUE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats_value = 1; + dst->reload_stats_value = mnl_attr_get_u32(attr); + } + } + + return 0; +} + +void devlink_dl_reload_act_stats_free(struct devlink_dl_reload_act_stats *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_stats_entry; i++) + devlink_dl_reload_stats_entry_free(&obj->reload_stats_entry[i]); + free(obj->reload_stats_entry); +} + +int devlink_dl_reload_act_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_act_stats *dst = yarg->data; + unsigned int n_reload_stats_entry = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_stats_entry) + return ynl_error_parse(yarg, "attribute already present (dl-reload-act-stats.reload-stats-entry)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { + n_reload_stats_entry++; + } + } + + if (n_reload_stats_entry) { + dst->reload_stats_entry = calloc(n_reload_stats_entry, sizeof(*dst->reload_stats_entry)); + dst->n_reload_stats_entry = n_reload_stats_entry; + i = 0; + parg.rsp_policy = &devlink_dl_reload_stats_entry_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_STATS_ENTRY) { + parg.data = &dst->reload_stats_entry[i]; + if (devlink_dl_reload_stats_entry_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_reload_act_info_free(struct devlink_dl_reload_act_info *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_action_stats; i++) + devlink_dl_reload_act_stats_free(&obj->reload_action_stats[i]); + free(obj->reload_action_stats); +} + +int devlink_dl_reload_act_info_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_act_info *dst = yarg->data; + unsigned int n_reload_action_stats = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_action_stats) + return ynl_error_parse(yarg, "attribute already present (dl-reload-act-info.reload-action-stats)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_ACTION) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_action = 1; + dst->reload_action = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RELOAD_ACTION_STATS) { + n_reload_action_stats++; + } + } + + if (n_reload_action_stats) { + dst->reload_action_stats = calloc(n_reload_action_stats, sizeof(*dst->reload_action_stats)); + dst->n_reload_action_stats = n_reload_action_stats; + i = 0; + parg.rsp_policy = &devlink_dl_reload_act_stats_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_STATS) { + parg.data = &dst->reload_action_stats[i]; + if (devlink_dl_reload_act_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_reload_stats_free(struct devlink_dl_reload_stats *obj) +{ + unsigned int i; + + for (i = 0; i < obj->n_reload_action_info; i++) + devlink_dl_reload_act_info_free(&obj->reload_action_info[i]); + free(obj->reload_action_info); +} + +int devlink_dl_reload_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_reload_stats *dst = yarg->data; + unsigned int n_reload_action_info = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + parg.ys = yarg->ys; + + if (dst->reload_action_info) + return ynl_error_parse(yarg, "attribute already present (dl-reload-stats.reload-action-info)"); + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_ACTION_INFO) { + n_reload_action_info++; + } + } + + if (n_reload_action_info) { + dst->reload_action_info = calloc(n_reload_action_info, sizeof(*dst->reload_action_info)); + dst->n_reload_action_info = n_reload_action_info; + i = 0; + parg.rsp_policy = &devlink_dl_reload_act_info_nest; + mnl_attr_for_each_nested(attr, nested) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_RELOAD_ACTION_INFO) { + parg.data = &dst->reload_action_info[i]; + if (devlink_dl_reload_act_info_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return 0; +} + +void devlink_dl_dev_stats_free(struct devlink_dl_dev_stats *obj) +{ + devlink_dl_reload_stats_free(&obj->reload_stats); + devlink_dl_reload_stats_free(&obj->remote_reload_stats); +} + +int devlink_dl_dev_stats_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct devlink_dl_dev_stats *dst = yarg->data; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + parg.ys = yarg->ys; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_RELOAD_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_stats = 1; + + parg.rsp_policy = &devlink_dl_reload_stats_nest; + parg.data = &dst->reload_stats; + if (devlink_dl_reload_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } else if (type == DEVLINK_ATTR_REMOTE_RELOAD_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.remote_reload_stats = 1; + + parg.rsp_policy = &devlink_dl_reload_stats_nest; + parg.data = &dst->remote_reload_stats; + if (devlink_dl_reload_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return 0; +} + +/* ============== DEVLINK_CMD_GET ============== */ +/* DEVLINK_CMD_GET - do */ +void devlink_get_req_free(struct devlink_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_get_rsp_free(struct devlink_get_rsp *rsp) +{ + free(rsp->bus_name); + free(rsp->dev_name); + devlink_dl_dev_stats_free(&rsp->dev_stats); + free(rsp); +} + +int devlink_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct devlink_get_rsp *dst; + const struct nlattr *attr; + struct ynl_parse_arg parg; + + dst = yarg->data; + parg.ys = yarg->ys; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_RELOAD_FAILED) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_failed = 1; + dst->reload_failed = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_RELOAD_ACTION) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.reload_action = 1; + dst->reload_action = mnl_attr_get_u8(attr); + } else if (type == DEVLINK_ATTR_DEV_STATS) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.dev_stats = 1; + + parg.rsp_policy = &devlink_dl_dev_stats_nest; + parg.data = &dst->dev_stats; + if (devlink_dl_dev_stats_parse(&parg, attr)) + return MNL_CB_ERROR; + } + } + + return MNL_CB_OK; +} + +struct devlink_get_rsp * +devlink_get(struct ynl_sock *ys, struct devlink_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_get_rsp_parse; + yrs.rsp_cmd = 3; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_get_rsp_free(rsp); + return NULL; +} + +/* DEVLINK_CMD_GET - dump */ +void devlink_get_list_free(struct devlink_get_list *rsp) +{ + struct devlink_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.bus_name); + free(rsp->obj.dev_name); + devlink_dl_dev_stats_free(&rsp->obj.dev_stats); + free(rsp); + } +} + +struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct devlink_get_list); + yds.cb = devlink_get_rsp_parse; + yds.rsp_cmd = 3; + yds.rsp_policy = &devlink_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, DEVLINK_CMD_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + devlink_get_list_free(yds.first); + return NULL; +} + +/* ============== DEVLINK_CMD_INFO_GET ============== */ +/* DEVLINK_CMD_INFO_GET - do */ +void devlink_info_get_req_free(struct devlink_info_get_req *req) +{ + free(req->bus_name); + free(req->dev_name); + free(req); +} + +void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp) +{ + unsigned int i; + + free(rsp->bus_name); + free(rsp->dev_name); + free(rsp->info_driver_name); + free(rsp->info_serial_number); + for (i = 0; i < rsp->n_info_version_fixed; i++) + devlink_dl_info_version_free(&rsp->info_version_fixed[i]); + free(rsp->info_version_fixed); + for (i = 0; i < rsp->n_info_version_running; i++) + devlink_dl_info_version_free(&rsp->info_version_running[i]); + free(rsp->info_version_running); + for (i = 0; i < rsp->n_info_version_stored; i++) + devlink_dl_info_version_free(&rsp->info_version_stored[i]); + free(rsp->info_version_stored); + free(rsp); +} + +int devlink_info_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + unsigned int n_info_version_running = 0; + unsigned int n_info_version_stored = 0; + unsigned int n_info_version_fixed = 0; + struct ynl_parse_arg *yarg = data; + struct devlink_info_get_rsp *dst; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + dst = yarg->data; + parg.ys = yarg->ys; + + if (dst->info_version_fixed) + return ynl_error_parse(yarg, "attribute already present (devlink.info-version-fixed)"); + if (dst->info_version_running) + return ynl_error_parse(yarg, "attribute already present (devlink.info-version-running)"); + if (dst->info_version_stored) + return ynl_error_parse(yarg, "attribute already present (devlink.info-version-stored)"); + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == DEVLINK_ATTR_BUS_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.bus_name_len = len; + dst->bus_name = malloc(len + 1); + memcpy(dst->bus_name, mnl_attr_get_str(attr), len); + dst->bus_name[len] = 0; + } else if (type == DEVLINK_ATTR_DEV_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.dev_name_len = len; + dst->dev_name = malloc(len + 1); + memcpy(dst->dev_name, mnl_attr_get_str(attr), len); + dst->dev_name[len] = 0; + } else if (type == DEVLINK_ATTR_INFO_DRIVER_NAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_driver_name_len = len; + dst->info_driver_name = malloc(len + 1); + memcpy(dst->info_driver_name, mnl_attr_get_str(attr), len); + dst->info_driver_name[len] = 0; + } else if (type == DEVLINK_ATTR_INFO_SERIAL_NUMBER) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.info_serial_number_len = len; + dst->info_serial_number = malloc(len + 1); + memcpy(dst->info_serial_number, mnl_attr_get_str(attr), len); + dst->info_serial_number[len] = 0; + } else if (type == DEVLINK_ATTR_INFO_VERSION_FIXED) { + n_info_version_fixed++; + } else if (type == DEVLINK_ATTR_INFO_VERSION_RUNNING) { + n_info_version_running++; + } else if (type == DEVLINK_ATTR_INFO_VERSION_STORED) { + n_info_version_stored++; + } + } + + if (n_info_version_fixed) { + dst->info_version_fixed = calloc(n_info_version_fixed, sizeof(*dst->info_version_fixed)); + dst->n_info_version_fixed = n_info_version_fixed; + i = 0; + parg.rsp_policy = &devlink_dl_info_version_nest; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_FIXED) { + parg.data = &dst->info_version_fixed[i]; + if (devlink_dl_info_version_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + if (n_info_version_running) { + dst->info_version_running = calloc(n_info_version_running, sizeof(*dst->info_version_running)); + dst->n_info_version_running = n_info_version_running; + i = 0; + parg.rsp_policy = &devlink_dl_info_version_nest; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_RUNNING) { + parg.data = &dst->info_version_running[i]; + if (devlink_dl_info_version_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + if (n_info_version_stored) { + dst->info_version_stored = calloc(n_info_version_stored, sizeof(*dst->info_version_stored)); + dst->n_info_version_stored = n_info_version_stored; + i = 0; + parg.rsp_policy = &devlink_dl_info_version_nest; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == DEVLINK_ATTR_INFO_VERSION_STORED) { + parg.data = &dst->info_version_stored[i]; + if (devlink_dl_info_version_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + + return MNL_CB_OK; +} + +struct devlink_info_get_rsp * +devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct devlink_info_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, DEVLINK_CMD_INFO_GET, 1); + ys->req_policy = &devlink_nest; + yrs.yarg.rsp_policy = &devlink_nest; + + if (req->_present.bus_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_BUS_NAME, req->bus_name); + if (req->_present.dev_name_len) + mnl_attr_put_strz(nlh, DEVLINK_ATTR_DEV_NAME, req->dev_name); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = devlink_info_get_rsp_parse; + yrs.rsp_cmd = DEVLINK_CMD_INFO_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + devlink_info_get_rsp_free(rsp); + return NULL; +} + +const struct ynl_family ynl_devlink_family = { + .name = "devlink", +}; diff --git a/tools/net/ynl/generated/devlink-user.h b/tools/net/ynl/generated/devlink-user.h new file mode 100644 index 000000000000..a008b99b6e24 --- /dev/null +++ b/tools/net/ynl/generated/devlink-user.h @@ -0,0 +1,210 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/devlink.yaml */ +/* YNL-GEN user header */ + +#ifndef _LINUX_DEVLINK_GEN_H +#define _LINUX_DEVLINK_GEN_H + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> +#include <linux/devlink.h> + +struct ynl_sock; + +extern const struct ynl_family ynl_devlink_family; + +/* Enums */ +const char *devlink_op_str(int op); + +/* Common nested types */ +struct devlink_dl_info_version { + struct { + __u32 info_version_name_len; + __u32 info_version_value_len; + } _present; + + char *info_version_name; + char *info_version_value; +}; + +struct devlink_dl_reload_stats_entry { + struct { + __u32 reload_stats_limit:1; + __u32 reload_stats_value:1; + } _present; + + __u8 reload_stats_limit; + __u32 reload_stats_value; +}; + +struct devlink_dl_reload_act_stats { + unsigned int n_reload_stats_entry; + struct devlink_dl_reload_stats_entry *reload_stats_entry; +}; + +struct devlink_dl_reload_act_info { + struct { + __u32 reload_action:1; + } _present; + + __u8 reload_action; + unsigned int n_reload_action_stats; + struct devlink_dl_reload_act_stats *reload_action_stats; +}; + +struct devlink_dl_reload_stats { + unsigned int n_reload_action_info; + struct devlink_dl_reload_act_info *reload_action_info; +}; + +struct devlink_dl_dev_stats { + struct { + __u32 reload_stats:1; + __u32 remote_reload_stats:1; + } _present; + + struct devlink_dl_reload_stats reload_stats; + struct devlink_dl_reload_stats remote_reload_stats; +}; + +/* ============== DEVLINK_CMD_GET ============== */ +/* DEVLINK_CMD_GET - do */ +struct devlink_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_get_req *devlink_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_get_req)); +} +void devlink_get_req_free(struct devlink_get_req *req); + +static inline void +devlink_get_req_set_bus_name(struct devlink_get_req *req, const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_get_req_set_dev_name(struct devlink_get_req *req, const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 reload_failed:1; + __u32 reload_action:1; + __u32 dev_stats:1; + } _present; + + char *bus_name; + char *dev_name; + __u8 reload_failed; + __u8 reload_action; + struct devlink_dl_dev_stats dev_stats; +}; + +void devlink_get_rsp_free(struct devlink_get_rsp *rsp); + +/* + * Get devlink instances. + */ +struct devlink_get_rsp * +devlink_get(struct ynl_sock *ys, struct devlink_get_req *req); + +/* DEVLINK_CMD_GET - dump */ +struct devlink_get_list { + struct devlink_get_list *next; + struct devlink_get_rsp obj __attribute__ ((aligned (8))); +}; + +void devlink_get_list_free(struct devlink_get_list *rsp); + +struct devlink_get_list *devlink_get_dump(struct ynl_sock *ys); + +/* ============== DEVLINK_CMD_INFO_GET ============== */ +/* DEVLINK_CMD_INFO_GET - do */ +struct devlink_info_get_req { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + } _present; + + char *bus_name; + char *dev_name; +}; + +static inline struct devlink_info_get_req *devlink_info_get_req_alloc(void) +{ + return calloc(1, sizeof(struct devlink_info_get_req)); +} +void devlink_info_get_req_free(struct devlink_info_get_req *req); + +static inline void +devlink_info_get_req_set_bus_name(struct devlink_info_get_req *req, + const char *bus_name) +{ + free(req->bus_name); + req->_present.bus_name_len = strlen(bus_name); + req->bus_name = malloc(req->_present.bus_name_len + 1); + memcpy(req->bus_name, bus_name, req->_present.bus_name_len); + req->bus_name[req->_present.bus_name_len] = 0; +} +static inline void +devlink_info_get_req_set_dev_name(struct devlink_info_get_req *req, + const char *dev_name) +{ + free(req->dev_name); + req->_present.dev_name_len = strlen(dev_name); + req->dev_name = malloc(req->_present.dev_name_len + 1); + memcpy(req->dev_name, dev_name, req->_present.dev_name_len); + req->dev_name[req->_present.dev_name_len] = 0; +} + +struct devlink_info_get_rsp { + struct { + __u32 bus_name_len; + __u32 dev_name_len; + __u32 info_driver_name_len; + __u32 info_serial_number_len; + } _present; + + char *bus_name; + char *dev_name; + char *info_driver_name; + char *info_serial_number; + unsigned int n_info_version_fixed; + struct devlink_dl_info_version *info_version_fixed; + unsigned int n_info_version_running; + struct devlink_dl_info_version *info_version_running; + unsigned int n_info_version_stored; + struct devlink_dl_info_version *info_version_stored; +}; + +void devlink_info_get_rsp_free(struct devlink_info_get_rsp *rsp); + +/* + * Get device information, like driver name, hardware and firmware versions etc. + */ +struct devlink_info_get_rsp * +devlink_info_get(struct ynl_sock *ys, struct devlink_info_get_req *req); + +#endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/tools/net/ynl/generated/fou-user.c b/tools/net/ynl/generated/fou-user.c new file mode 100644 index 000000000000..4271b5d43c58 --- /dev/null +++ b/tools/net/ynl/generated/fou-user.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/fou.yaml */ +/* YNL-GEN user source */ + +#include <stdlib.h> +#include <string.h> +#include "fou-user.h" +#include "ynl.h" +#include <linux/fou.h> + +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +/* Enums */ +static const char * const fou_op_strmap[] = { + [FOU_CMD_ADD] = "add", + [FOU_CMD_DEL] = "del", + [FOU_CMD_GET] = "get", +}; + +const char *fou_op_str(int op) +{ + if (op < 0 || op >= (int)MNL_ARRAY_SIZE(fou_op_strmap)) + return NULL; + return fou_op_strmap[op]; +} + +static const char * const fou_encap_type_strmap[] = { + [0] = "unspec", + [1] = "direct", + [2] = "gue", +}; + +const char *fou_encap_type_str(int value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(fou_encap_type_strmap)) + return NULL; + return fou_encap_type_strmap[value]; +} + +/* Policies */ +struct ynl_policy_attr fou_policy[FOU_ATTR_MAX + 1] = { + [FOU_ATTR_UNSPEC] = { .name = "unspec", .type = YNL_PT_REJECT, }, + [FOU_ATTR_PORT] = { .name = "port", .type = YNL_PT_U16, }, + [FOU_ATTR_AF] = { .name = "af", .type = YNL_PT_U8, }, + [FOU_ATTR_IPPROTO] = { .name = "ipproto", .type = YNL_PT_U8, }, + [FOU_ATTR_TYPE] = { .name = "type", .type = YNL_PT_U8, }, + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .name = "remcsum_nopartial", .type = YNL_PT_FLAG, }, + [FOU_ATTR_LOCAL_V4] = { .name = "local_v4", .type = YNL_PT_U32, }, + [FOU_ATTR_LOCAL_V6] = { .name = "local_v6", .type = YNL_PT_BINARY,}, + [FOU_ATTR_PEER_V4] = { .name = "peer_v4", .type = YNL_PT_U32, }, + [FOU_ATTR_PEER_V6] = { .name = "peer_v6", .type = YNL_PT_BINARY,}, + [FOU_ATTR_PEER_PORT] = { .name = "peer_port", .type = YNL_PT_U16, }, + [FOU_ATTR_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest fou_nest = { + .max_attr = FOU_ATTR_MAX, + .table = fou_policy, +}; + +/* Common nested types */ +/* ============== FOU_CMD_ADD ============== */ +/* FOU_CMD_ADD - do */ +void fou_add_req_free(struct fou_add_req *req) +{ + free(req->local_v6); + free(req->peer_v6); + free(req); +} + +int fou_add(struct ynl_sock *ys, struct fou_add_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_ADD, 1); + ys->req_policy = &fou_nest; + + if (req->_present.port) + mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); + if (req->_present.ipproto) + mnl_attr_put_u8(nlh, FOU_ATTR_IPPROTO, req->ipproto); + if (req->_present.type) + mnl_attr_put_u8(nlh, FOU_ATTR_TYPE, req->type); + if (req->_present.remcsum_nopartial) + mnl_attr_put(nlh, FOU_ATTR_REMCSUM_NOPARTIAL, 0, NULL); + if (req->_present.local_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); + if (req->_present.peer_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); + if (req->_present.local_v6_len) + mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); + if (req->_present.peer_v6_len) + mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); + if (req->_present.peer_port) + mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== FOU_CMD_DEL ============== */ +/* FOU_CMD_DEL - do */ +void fou_del_req_free(struct fou_del_req *req) +{ + free(req->local_v6); + free(req->peer_v6); + free(req); +} + +int fou_del(struct ynl_sock *ys, struct fou_del_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_DEL, 1); + ys->req_policy = &fou_nest; + + if (req->_present.af) + mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af); + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); + if (req->_present.port) + mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); + if (req->_present.peer_port) + mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); + if (req->_present.local_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); + if (req->_present.peer_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); + if (req->_present.local_v6_len) + mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); + if (req->_present.peer_v6_len) + mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +/* ============== FOU_CMD_GET ============== */ +/* FOU_CMD_GET - do */ +void fou_get_req_free(struct fou_get_req *req) +{ + free(req->local_v6); + free(req->peer_v6); + free(req); +} + +void fou_get_rsp_free(struct fou_get_rsp *rsp) +{ + free(rsp->local_v6); + free(rsp->peer_v6); + free(rsp); +} + +int fou_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + const struct nlattr *attr; + struct fou_get_rsp *dst; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == FOU_ATTR_PORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.port = 1; + dst->port = mnl_attr_get_u16(attr); + } else if (type == FOU_ATTR_IPPROTO) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ipproto = 1; + dst->ipproto = mnl_attr_get_u8(attr); + } else if (type == FOU_ATTR_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.type = 1; + dst->type = mnl_attr_get_u8(attr); + } else if (type == FOU_ATTR_REMCSUM_NOPARTIAL) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.remcsum_nopartial = 1; + } else if (type == FOU_ATTR_LOCAL_V4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.local_v4 = 1; + dst->local_v4 = mnl_attr_get_u32(attr); + } else if (type == FOU_ATTR_PEER_V4) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.peer_v4 = 1; + dst->peer_v4 = mnl_attr_get_u32(attr); + } else if (type == FOU_ATTR_LOCAL_V6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.local_v6_len = len; + dst->local_v6 = malloc(len); + memcpy(dst->local_v6, mnl_attr_get_payload(attr), len); + } else if (type == FOU_ATTR_PEER_V6) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = mnl_attr_get_payload_len(attr); + dst->_present.peer_v6_len = len; + dst->peer_v6 = malloc(len); + memcpy(dst->peer_v6, mnl_attr_get_payload(attr), len); + } else if (type == FOU_ATTR_PEER_PORT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.peer_port = 1; + dst->peer_port = mnl_attr_get_u16(attr); + } else if (type == FOU_ATTR_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct fou_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, FOU_CMD_GET, 1); + ys->req_policy = &fou_nest; + yrs.yarg.rsp_policy = &fou_nest; + + if (req->_present.af) + mnl_attr_put_u8(nlh, FOU_ATTR_AF, req->af); + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, FOU_ATTR_IFINDEX, req->ifindex); + if (req->_present.port) + mnl_attr_put_u16(nlh, FOU_ATTR_PORT, req->port); + if (req->_present.peer_port) + mnl_attr_put_u16(nlh, FOU_ATTR_PEER_PORT, req->peer_port); + if (req->_present.local_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_LOCAL_V4, req->local_v4); + if (req->_present.peer_v4) + mnl_attr_put_u32(nlh, FOU_ATTR_PEER_V4, req->peer_v4); + if (req->_present.local_v6_len) + mnl_attr_put(nlh, FOU_ATTR_LOCAL_V6, req->_present.local_v6_len, req->local_v6); + if (req->_present.peer_v6_len) + mnl_attr_put(nlh, FOU_ATTR_PEER_V6, req->_present.peer_v6_len, req->peer_v6); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = fou_get_rsp_parse; + yrs.rsp_cmd = FOU_CMD_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + fou_get_rsp_free(rsp); + return NULL; +} + +/* FOU_CMD_GET - dump */ +void fou_get_list_free(struct fou_get_list *rsp) +{ + struct fou_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp->obj.local_v6); + free(rsp->obj.peer_v6); + free(rsp); + } +} + +struct fou_get_list *fou_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct fou_get_list); + yds.cb = fou_get_rsp_parse; + yds.rsp_cmd = FOU_CMD_GET; + yds.rsp_policy = &fou_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, FOU_CMD_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + fou_get_list_free(yds.first); + return NULL; +} + +const struct ynl_family ynl_fou_family = { + .name = "fou", +}; diff --git a/tools/net/ynl/generated/fou-user.h b/tools/net/ynl/generated/fou-user.h new file mode 100644 index 000000000000..d8ab50579cd1 --- /dev/null +++ b/tools/net/ynl/generated/fou-user.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/fou.yaml */ +/* YNL-GEN user header */ + +#ifndef _LINUX_FOU_GEN_H +#define _LINUX_FOU_GEN_H + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> +#include <linux/fou.h> + +struct ynl_sock; + +extern const struct ynl_family ynl_fou_family; + +/* Enums */ +const char *fou_op_str(int op); +const char *fou_encap_type_str(int value); + +/* Common nested types */ +/* ============== FOU_CMD_ADD ============== */ +/* FOU_CMD_ADD - do */ +struct fou_add_req { + struct { + __u32 port:1; + __u32 ipproto:1; + __u32 type:1; + __u32 remcsum_nopartial:1; + __u32 local_v4:1; + __u32 peer_v4:1; + __u32 local_v6_len; + __u32 peer_v6_len; + __u32 peer_port:1; + __u32 ifindex:1; + } _present; + + __u16 port /* big-endian */; + __u8 ipproto; + __u8 type; + __u32 local_v4; + __u32 peer_v4; + void *local_v6; + void *peer_v6; + __u16 peer_port /* big-endian */; + __s32 ifindex; +}; + +static inline struct fou_add_req *fou_add_req_alloc(void) +{ + return calloc(1, sizeof(struct fou_add_req)); +} +void fou_add_req_free(struct fou_add_req *req); + +static inline void +fou_add_req_set_port(struct fou_add_req *req, __u16 port /* big-endian */) +{ + req->_present.port = 1; + req->port = port; +} +static inline void +fou_add_req_set_ipproto(struct fou_add_req *req, __u8 ipproto) +{ + req->_present.ipproto = 1; + req->ipproto = ipproto; +} +static inline void fou_add_req_set_type(struct fou_add_req *req, __u8 type) +{ + req->_present.type = 1; + req->type = type; +} +static inline void fou_add_req_set_remcsum_nopartial(struct fou_add_req *req) +{ + req->_present.remcsum_nopartial = 1; +} +static inline void +fou_add_req_set_local_v4(struct fou_add_req *req, __u32 local_v4) +{ + req->_present.local_v4 = 1; + req->local_v4 = local_v4; +} +static inline void +fou_add_req_set_peer_v4(struct fou_add_req *req, __u32 peer_v4) +{ + req->_present.peer_v4 = 1; + req->peer_v4 = peer_v4; +} +static inline void +fou_add_req_set_local_v6(struct fou_add_req *req, const void *local_v6, + size_t len) +{ + free(req->local_v6); + req->local_v6 = malloc(req->_present.local_v6_len); + memcpy(req->local_v6, local_v6, req->_present.local_v6_len); +} +static inline void +fou_add_req_set_peer_v6(struct fou_add_req *req, const void *peer_v6, + size_t len) +{ + free(req->peer_v6); + req->peer_v6 = malloc(req->_present.peer_v6_len); + memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); +} +static inline void +fou_add_req_set_peer_port(struct fou_add_req *req, + __u16 peer_port /* big-endian */) +{ + req->_present.peer_port = 1; + req->peer_port = peer_port; +} +static inline void +fou_add_req_set_ifindex(struct fou_add_req *req, __s32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +/* + * Add port. + */ +int fou_add(struct ynl_sock *ys, struct fou_add_req *req); + +/* ============== FOU_CMD_DEL ============== */ +/* FOU_CMD_DEL - do */ +struct fou_del_req { + struct { + __u32 af:1; + __u32 ifindex:1; + __u32 port:1; + __u32 peer_port:1; + __u32 local_v4:1; + __u32 peer_v4:1; + __u32 local_v6_len; + __u32 peer_v6_len; + } _present; + + __u8 af; + __s32 ifindex; + __u16 port /* big-endian */; + __u16 peer_port /* big-endian */; + __u32 local_v4; + __u32 peer_v4; + void *local_v6; + void *peer_v6; +}; + +static inline struct fou_del_req *fou_del_req_alloc(void) +{ + return calloc(1, sizeof(struct fou_del_req)); +} +void fou_del_req_free(struct fou_del_req *req); + +static inline void fou_del_req_set_af(struct fou_del_req *req, __u8 af) +{ + req->_present.af = 1; + req->af = af; +} +static inline void +fou_del_req_set_ifindex(struct fou_del_req *req, __s32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} +static inline void +fou_del_req_set_port(struct fou_del_req *req, __u16 port /* big-endian */) +{ + req->_present.port = 1; + req->port = port; +} +static inline void +fou_del_req_set_peer_port(struct fou_del_req *req, + __u16 peer_port /* big-endian */) +{ + req->_present.peer_port = 1; + req->peer_port = peer_port; +} +static inline void +fou_del_req_set_local_v4(struct fou_del_req *req, __u32 local_v4) +{ + req->_present.local_v4 = 1; + req->local_v4 = local_v4; +} +static inline void +fou_del_req_set_peer_v4(struct fou_del_req *req, __u32 peer_v4) +{ + req->_present.peer_v4 = 1; + req->peer_v4 = peer_v4; +} +static inline void +fou_del_req_set_local_v6(struct fou_del_req *req, const void *local_v6, + size_t len) +{ + free(req->local_v6); + req->local_v6 = malloc(req->_present.local_v6_len); + memcpy(req->local_v6, local_v6, req->_present.local_v6_len); +} +static inline void +fou_del_req_set_peer_v6(struct fou_del_req *req, const void *peer_v6, + size_t len) +{ + free(req->peer_v6); + req->peer_v6 = malloc(req->_present.peer_v6_len); + memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); +} + +/* + * Delete port. + */ +int fou_del(struct ynl_sock *ys, struct fou_del_req *req); + +/* ============== FOU_CMD_GET ============== */ +/* FOU_CMD_GET - do */ +struct fou_get_req { + struct { + __u32 af:1; + __u32 ifindex:1; + __u32 port:1; + __u32 peer_port:1; + __u32 local_v4:1; + __u32 peer_v4:1; + __u32 local_v6_len; + __u32 peer_v6_len; + } _present; + + __u8 af; + __s32 ifindex; + __u16 port /* big-endian */; + __u16 peer_port /* big-endian */; + __u32 local_v4; + __u32 peer_v4; + void *local_v6; + void *peer_v6; +}; + +static inline struct fou_get_req *fou_get_req_alloc(void) +{ + return calloc(1, sizeof(struct fou_get_req)); +} +void fou_get_req_free(struct fou_get_req *req); + +static inline void fou_get_req_set_af(struct fou_get_req *req, __u8 af) +{ + req->_present.af = 1; + req->af = af; +} +static inline void +fou_get_req_set_ifindex(struct fou_get_req *req, __s32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} +static inline void +fou_get_req_set_port(struct fou_get_req *req, __u16 port /* big-endian */) +{ + req->_present.port = 1; + req->port = port; +} +static inline void +fou_get_req_set_peer_port(struct fou_get_req *req, + __u16 peer_port /* big-endian */) +{ + req->_present.peer_port = 1; + req->peer_port = peer_port; +} +static inline void +fou_get_req_set_local_v4(struct fou_get_req *req, __u32 local_v4) +{ + req->_present.local_v4 = 1; + req->local_v4 = local_v4; +} +static inline void +fou_get_req_set_peer_v4(struct fou_get_req *req, __u32 peer_v4) +{ + req->_present.peer_v4 = 1; + req->peer_v4 = peer_v4; +} +static inline void +fou_get_req_set_local_v6(struct fou_get_req *req, const void *local_v6, + size_t len) +{ + free(req->local_v6); + req->local_v6 = malloc(req->_present.local_v6_len); + memcpy(req->local_v6, local_v6, req->_present.local_v6_len); +} +static inline void +fou_get_req_set_peer_v6(struct fou_get_req *req, const void *peer_v6, + size_t len) +{ + free(req->peer_v6); + req->peer_v6 = malloc(req->_present.peer_v6_len); + memcpy(req->peer_v6, peer_v6, req->_present.peer_v6_len); +} + +struct fou_get_rsp { + struct { + __u32 port:1; + __u32 ipproto:1; + __u32 type:1; + __u32 remcsum_nopartial:1; + __u32 local_v4:1; + __u32 peer_v4:1; + __u32 local_v6_len; + __u32 peer_v6_len; + __u32 peer_port:1; + __u32 ifindex:1; + } _present; + + __u16 port /* big-endian */; + __u8 ipproto; + __u8 type; + __u32 local_v4; + __u32 peer_v4; + void *local_v6; + void *peer_v6; + __u16 peer_port /* big-endian */; + __s32 ifindex; +}; + +void fou_get_rsp_free(struct fou_get_rsp *rsp); + +/* + * Get tunnel info. + */ +struct fou_get_rsp *fou_get(struct ynl_sock *ys, struct fou_get_req *req); + +/* FOU_CMD_GET - dump */ +struct fou_get_list { + struct fou_get_list *next; + struct fou_get_rsp obj __attribute__ ((aligned (8))); +}; + +void fou_get_list_free(struct fou_get_list *rsp); + +struct fou_get_list *fou_get_dump(struct ynl_sock *ys); + +#endif /* _LINUX_FOU_GEN_H */ diff --git a/tools/net/ynl/generated/handshake-user.c b/tools/net/ynl/generated/handshake-user.c new file mode 100644 index 000000000000..7c67765daf90 --- /dev/null +++ b/tools/net/ynl/generated/handshake-user.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN user source */ + +#include <stdlib.h> +#include <string.h> +#include "handshake-user.h" +#include "ynl.h" +#include <linux/handshake.h> + +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +/* Enums */ +static const char * const handshake_op_strmap[] = { + [HANDSHAKE_CMD_READY] = "ready", + [HANDSHAKE_CMD_ACCEPT] = "accept", + [HANDSHAKE_CMD_DONE] = "done", +}; + +const char *handshake_op_str(int op) +{ + if (op < 0 || op >= (int)MNL_ARRAY_SIZE(handshake_op_strmap)) + return NULL; + return handshake_op_strmap[op]; +} + +static const char * const handshake_handler_class_strmap[] = { + [0] = "none", + [1] = "tlshd", + [2] = "max", +}; + +const char *handshake_handler_class_str(enum handshake_handler_class value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_handler_class_strmap)) + return NULL; + return handshake_handler_class_strmap[value]; +} + +static const char * const handshake_msg_type_strmap[] = { + [0] = "unspec", + [1] = "clienthello", + [2] = "serverhello", +}; + +const char *handshake_msg_type_str(enum handshake_msg_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_msg_type_strmap)) + return NULL; + return handshake_msg_type_strmap[value]; +} + +static const char * const handshake_auth_strmap[] = { + [0] = "unspec", + [1] = "unauth", + [2] = "psk", + [3] = "x509", +}; + +const char *handshake_auth_str(enum handshake_auth value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(handshake_auth_strmap)) + return NULL; + return handshake_auth_strmap[value]; +} + +/* Policies */ +struct ynl_policy_attr handshake_x509_policy[HANDSHAKE_A_X509_MAX + 1] = { + [HANDSHAKE_A_X509_CERT] = { .name = "cert", .type = YNL_PT_U32, }, + [HANDSHAKE_A_X509_PRIVKEY] = { .name = "privkey", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest handshake_x509_nest = { + .max_attr = HANDSHAKE_A_X509_MAX, + .table = handshake_x509_policy, +}; + +struct ynl_policy_attr handshake_accept_policy[HANDSHAKE_A_ACCEPT_MAX + 1] = { + [HANDSHAKE_A_ACCEPT_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = { .name = "handler-class", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_MESSAGE_TYPE] = { .name = "message-type", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_TIMEOUT] = { .name = "timeout", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_AUTH_MODE] = { .name = "auth-mode", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_PEER_IDENTITY] = { .name = "peer-identity", .type = YNL_PT_U32, }, + [HANDSHAKE_A_ACCEPT_CERTIFICATE] = { .name = "certificate", .type = YNL_PT_NEST, .nest = &handshake_x509_nest, }, + [HANDSHAKE_A_ACCEPT_PEERNAME] = { .name = "peername", .type = YNL_PT_NUL_STR, }, +}; + +struct ynl_policy_nest handshake_accept_nest = { + .max_attr = HANDSHAKE_A_ACCEPT_MAX, + .table = handshake_accept_policy, +}; + +struct ynl_policy_attr handshake_done_policy[HANDSHAKE_A_DONE_MAX + 1] = { + [HANDSHAKE_A_DONE_STATUS] = { .name = "status", .type = YNL_PT_U32, }, + [HANDSHAKE_A_DONE_SOCKFD] = { .name = "sockfd", .type = YNL_PT_U32, }, + [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .name = "remote-auth", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest handshake_done_nest = { + .max_attr = HANDSHAKE_A_DONE_MAX, + .table = handshake_done_policy, +}; + +/* Common nested types */ +void handshake_x509_free(struct handshake_x509 *obj) +{ +} + +int handshake_x509_parse(struct ynl_parse_arg *yarg, + const struct nlattr *nested) +{ + struct handshake_x509 *dst = yarg->data; + const struct nlattr *attr; + + mnl_attr_for_each_nested(attr, nested) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == HANDSHAKE_A_X509_CERT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.cert = 1; + dst->cert = mnl_attr_get_u32(attr); + } else if (type == HANDSHAKE_A_X509_PRIVKEY) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.privkey = 1; + dst->privkey = mnl_attr_get_u32(attr); + } + } + + return 0; +} + +/* ============== HANDSHAKE_CMD_ACCEPT ============== */ +/* HANDSHAKE_CMD_ACCEPT - do */ +void handshake_accept_req_free(struct handshake_accept_req *req) +{ + free(req); +} + +void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp) +{ + unsigned int i; + + free(rsp->peer_identity); + for (i = 0; i < rsp->n_certificate; i++) + handshake_x509_free(&rsp->certificate[i]); + free(rsp->certificate); + free(rsp->peername); + free(rsp); +} + +int handshake_accept_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct handshake_accept_rsp *dst; + unsigned int n_peer_identity = 0; + unsigned int n_certificate = 0; + const struct nlattr *attr; + struct ynl_parse_arg parg; + int i; + + dst = yarg->data; + parg.ys = yarg->ys; + + if (dst->certificate) + return ynl_error_parse(yarg, "attribute already present (accept.certificate)"); + if (dst->peer_identity) + return ynl_error_parse(yarg, "attribute already present (accept.peer-identity)"); + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == HANDSHAKE_A_ACCEPT_SOCKFD) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.sockfd = 1; + dst->sockfd = mnl_attr_get_u32(attr); + } else if (type == HANDSHAKE_A_ACCEPT_MESSAGE_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.message_type = 1; + dst->message_type = mnl_attr_get_u32(attr); + } else if (type == HANDSHAKE_A_ACCEPT_TIMEOUT) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.timeout = 1; + dst->timeout = mnl_attr_get_u32(attr); + } else if (type == HANDSHAKE_A_ACCEPT_AUTH_MODE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.auth_mode = 1; + dst->auth_mode = mnl_attr_get_u32(attr); + } else if (type == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) { + n_peer_identity++; + } else if (type == HANDSHAKE_A_ACCEPT_CERTIFICATE) { + n_certificate++; + } else if (type == HANDSHAKE_A_ACCEPT_PEERNAME) { + unsigned int len; + + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + + len = strnlen(mnl_attr_get_str(attr), mnl_attr_get_payload_len(attr)); + dst->_present.peername_len = len; + dst->peername = malloc(len + 1); + memcpy(dst->peername, mnl_attr_get_str(attr), len); + dst->peername[len] = 0; + } + } + + if (n_certificate) { + dst->certificate = calloc(n_certificate, sizeof(*dst->certificate)); + dst->n_certificate = n_certificate; + i = 0; + parg.rsp_policy = &handshake_x509_nest; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_CERTIFICATE) { + parg.data = &dst->certificate[i]; + if (handshake_x509_parse(&parg, attr)) + return MNL_CB_ERROR; + i++; + } + } + } + if (n_peer_identity) { + dst->peer_identity = calloc(n_peer_identity, sizeof(*dst->peer_identity)); + dst->n_peer_identity = n_peer_identity; + i = 0; + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == HANDSHAKE_A_ACCEPT_PEER_IDENTITY) { + dst->peer_identity[i] = mnl_attr_get_u32(attr); + i++; + } + } + } + + return MNL_CB_OK; +} + +struct handshake_accept_rsp * +handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct handshake_accept_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_ACCEPT, 1); + ys->req_policy = &handshake_accept_nest; + yrs.yarg.rsp_policy = &handshake_accept_nest; + + if (req->_present.handler_class) + mnl_attr_put_u32(nlh, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, req->handler_class); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = handshake_accept_rsp_parse; + yrs.rsp_cmd = HANDSHAKE_CMD_ACCEPT; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + handshake_accept_rsp_free(rsp); + return NULL; +} + +/* HANDSHAKE_CMD_ACCEPT - notify */ +void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp) +{ + unsigned int i; + + free(rsp->obj.peer_identity); + for (i = 0; i < rsp->obj.n_certificate; i++) + handshake_x509_free(&rsp->obj.certificate[i]); + free(rsp->obj.certificate); + free(rsp->obj.peername); + free(rsp); +} + +/* ============== HANDSHAKE_CMD_DONE ============== */ +/* HANDSHAKE_CMD_DONE - do */ +void handshake_done_req_free(struct handshake_done_req *req) +{ + free(req->remote_auth); + free(req); +} + +int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req) +{ + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, HANDSHAKE_CMD_DONE, 1); + ys->req_policy = &handshake_done_nest; + + if (req->_present.status) + mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_STATUS, req->status); + if (req->_present.sockfd) + mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_SOCKFD, req->sockfd); + for (unsigned int i = 0; i < req->n_remote_auth; i++) + mnl_attr_put_u32(nlh, HANDSHAKE_A_DONE_REMOTE_AUTH, req->remote_auth[i]); + + err = ynl_exec(ys, nlh, NULL); + if (err < 0) + return -1; + + return 0; +} + +static const struct ynl_ntf_info handshake_ntf_info[] = { + [HANDSHAKE_CMD_READY] = { + .alloc_sz = sizeof(struct handshake_accept_ntf), + .cb = handshake_accept_rsp_parse, + .policy = &handshake_accept_nest, + .free = (void *)handshake_accept_ntf_free, + }, +}; + +const struct ynl_family ynl_handshake_family = { + .name = "handshake", + .ntf_info = handshake_ntf_info, + .ntf_info_size = MNL_ARRAY_SIZE(handshake_ntf_info), +}; diff --git a/tools/net/ynl/generated/handshake-user.h b/tools/net/ynl/generated/handshake-user.h new file mode 100644 index 000000000000..47646bb91cea --- /dev/null +++ b/tools/net/ynl/generated/handshake-user.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/handshake.yaml */ +/* YNL-GEN user header */ + +#ifndef _LINUX_HANDSHAKE_GEN_H +#define _LINUX_HANDSHAKE_GEN_H + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> +#include <linux/handshake.h> + +struct ynl_sock; + +extern const struct ynl_family ynl_handshake_family; + +/* Enums */ +const char *handshake_op_str(int op); +const char *handshake_handler_class_str(enum handshake_handler_class value); +const char *handshake_msg_type_str(enum handshake_msg_type value); +const char *handshake_auth_str(enum handshake_auth value); + +/* Common nested types */ +struct handshake_x509 { + struct { + __u32 cert:1; + __u32 privkey:1; + } _present; + + __u32 cert; + __u32 privkey; +}; + +/* ============== HANDSHAKE_CMD_ACCEPT ============== */ +/* HANDSHAKE_CMD_ACCEPT - do */ +struct handshake_accept_req { + struct { + __u32 handler_class:1; + } _present; + + enum handshake_handler_class handler_class; +}; + +static inline struct handshake_accept_req *handshake_accept_req_alloc(void) +{ + return calloc(1, sizeof(struct handshake_accept_req)); +} +void handshake_accept_req_free(struct handshake_accept_req *req); + +static inline void +handshake_accept_req_set_handler_class(struct handshake_accept_req *req, + enum handshake_handler_class handler_class) +{ + req->_present.handler_class = 1; + req->handler_class = handler_class; +} + +struct handshake_accept_rsp { + struct { + __u32 sockfd:1; + __u32 message_type:1; + __u32 timeout:1; + __u32 auth_mode:1; + __u32 peername_len; + } _present; + + __u32 sockfd; + enum handshake_msg_type message_type; + __u32 timeout; + enum handshake_auth auth_mode; + unsigned int n_peer_identity; + __u32 *peer_identity; + unsigned int n_certificate; + struct handshake_x509 *certificate; + char *peername; +}; + +void handshake_accept_rsp_free(struct handshake_accept_rsp *rsp); + +/* + * Handler retrieves next queued handshake request + */ +struct handshake_accept_rsp * +handshake_accept(struct ynl_sock *ys, struct handshake_accept_req *req); + +/* HANDSHAKE_CMD_ACCEPT - notify */ +struct handshake_accept_ntf { + __u16 family; + __u8 cmd; + struct ynl_ntf_base_type *next; + void (*free)(struct handshake_accept_ntf *ntf); + struct handshake_accept_rsp obj __attribute__ ((aligned (8))); +}; + +void handshake_accept_ntf_free(struct handshake_accept_ntf *rsp); + +/* ============== HANDSHAKE_CMD_DONE ============== */ +/* HANDSHAKE_CMD_DONE - do */ +struct handshake_done_req { + struct { + __u32 status:1; + __u32 sockfd:1; + } _present; + + __u32 status; + __u32 sockfd; + unsigned int n_remote_auth; + __u32 *remote_auth; +}; + +static inline struct handshake_done_req *handshake_done_req_alloc(void) +{ + return calloc(1, sizeof(struct handshake_done_req)); +} +void handshake_done_req_free(struct handshake_done_req *req); + +static inline void +handshake_done_req_set_status(struct handshake_done_req *req, __u32 status) +{ + req->_present.status = 1; + req->status = status; +} +static inline void +handshake_done_req_set_sockfd(struct handshake_done_req *req, __u32 sockfd) +{ + req->_present.sockfd = 1; + req->sockfd = sockfd; +} +static inline void +__handshake_done_req_set_remote_auth(struct handshake_done_req *req, + __u32 *remote_auth, + unsigned int n_remote_auth) +{ + free(req->remote_auth); + req->remote_auth = remote_auth; + req->n_remote_auth = n_remote_auth; +} + +/* + * Handler reports handshake completion + */ +int handshake_done(struct ynl_sock *ys, struct handshake_done_req *req); + +#endif /* _LINUX_HANDSHAKE_GEN_H */ diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c new file mode 100644 index 000000000000..4eb8aefef0cd --- /dev/null +++ b/tools/net/ynl/generated/netdev-user.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN user source */ + +#include <stdlib.h> +#include <string.h> +#include "netdev-user.h" +#include "ynl.h" +#include <linux/netdev.h> + +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +/* Enums */ +static const char * const netdev_op_strmap[] = { + [NETDEV_CMD_DEV_GET] = "dev-get", + [NETDEV_CMD_DEV_ADD_NTF] = "dev-add-ntf", + [NETDEV_CMD_DEV_DEL_NTF] = "dev-del-ntf", + [NETDEV_CMD_DEV_CHANGE_NTF] = "dev-change-ntf", +}; + +const char *netdev_op_str(int op) +{ + if (op < 0 || op >= (int)MNL_ARRAY_SIZE(netdev_op_strmap)) + return NULL; + return netdev_op_strmap[op]; +} + +static const char * const netdev_xdp_act_strmap[] = { + [0] = "basic", + [1] = "redirect", + [2] = "ndo-xmit", + [3] = "xsk-zerocopy", + [4] = "hw-offload", + [5] = "rx-sg", + [6] = "ndo-xmit-sg", +}; + +const char *netdev_xdp_act_str(enum netdev_xdp_act value) +{ + value = ffs(value) - 1; + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xdp_act_strmap)) + return NULL; + return netdev_xdp_act_strmap[value]; +} + +/* Policies */ +struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { + [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_DEV_PAD] = { .name = "pad", .type = YNL_PT_IGNORE, }, + [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, }, +}; + +struct ynl_policy_nest netdev_dev_nest = { + .max_attr = NETDEV_A_DEV_MAX, + .table = netdev_dev_policy, +}; + +/* Common nested types */ +/* ============== NETDEV_CMD_DEV_GET ============== */ +/* NETDEV_CMD_DEV_GET - do */ +void netdev_dev_get_req_free(struct netdev_dev_get_req *req) +{ + free(req); +} + +void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct netdev_dev_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_DEV_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_DEV_XDP_FEATURES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xdp_features = 1; + dst->xdp_features = mnl_attr_get_u64(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_dev_get_rsp * +netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_dev_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1); + ys->req_policy = &netdev_dev_nest; + yrs.yarg.rsp_policy = &netdev_dev_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_DEV_IFINDEX, req->ifindex); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_dev_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_DEV_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_dev_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_DEV_GET - dump */ +void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp) +{ + struct netdev_dev_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_dev_get_list); + yds.cb = netdev_dev_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_DEV_GET; + yds.rsp_policy = &netdev_dev_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_DEV_GET, 1); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_dev_get_list_free(yds.first); + return NULL; +} + +/* NETDEV_CMD_DEV_GET - notify */ +void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp) +{ + free(rsp); +} + +static const struct ynl_ntf_info netdev_ntf_info[] = { + [NETDEV_CMD_DEV_ADD_NTF] = { + .alloc_sz = sizeof(struct netdev_dev_get_ntf), + .cb = netdev_dev_get_rsp_parse, + .policy = &netdev_dev_nest, + .free = (void *)netdev_dev_get_ntf_free, + }, + [NETDEV_CMD_DEV_DEL_NTF] = { + .alloc_sz = sizeof(struct netdev_dev_get_ntf), + .cb = netdev_dev_get_rsp_parse, + .policy = &netdev_dev_nest, + .free = (void *)netdev_dev_get_ntf_free, + }, + [NETDEV_CMD_DEV_CHANGE_NTF] = { + .alloc_sz = sizeof(struct netdev_dev_get_ntf), + .cb = netdev_dev_get_rsp_parse, + .policy = &netdev_dev_nest, + .free = (void *)netdev_dev_get_ntf_free, + }, +}; + +const struct ynl_family ynl_netdev_family = { + .name = "netdev", + .ntf_info = netdev_ntf_info, + .ntf_info_size = MNL_ARRAY_SIZE(netdev_ntf_info), +}; diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h new file mode 100644 index 000000000000..5554dc69bb9c --- /dev/null +++ b/tools/net/ynl/generated/netdev-user.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/netdev.yaml */ +/* YNL-GEN user header */ + +#ifndef _LINUX_NETDEV_GEN_H +#define _LINUX_NETDEV_GEN_H + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> +#include <linux/netdev.h> + +struct ynl_sock; + +extern const struct ynl_family ynl_netdev_family; + +/* Enums */ +const char *netdev_op_str(int op); +const char *netdev_xdp_act_str(enum netdev_xdp_act value); + +/* Common nested types */ +/* ============== NETDEV_CMD_DEV_GET ============== */ +/* NETDEV_CMD_DEV_GET - do */ +struct netdev_dev_get_req { + struct { + __u32 ifindex:1; + } _present; + + __u32 ifindex; +}; + +static inline struct netdev_dev_get_req *netdev_dev_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_dev_get_req)); +} +void netdev_dev_get_req_free(struct netdev_dev_get_req *req); + +static inline void +netdev_dev_get_req_set_ifindex(struct netdev_dev_get_req *req, __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +struct netdev_dev_get_rsp { + struct { + __u32 ifindex:1; + __u32 xdp_features:1; + } _present; + + __u32 ifindex; + __u64 xdp_features; +}; + +void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp); + +/* + * Get / dump information about a netdev. + */ +struct netdev_dev_get_rsp * +netdev_dev_get(struct ynl_sock *ys, struct netdev_dev_get_req *req); + +/* NETDEV_CMD_DEV_GET - dump */ +struct netdev_dev_get_list { + struct netdev_dev_get_list *next; + struct netdev_dev_get_rsp obj __attribute__ ((aligned (8))); +}; + +void netdev_dev_get_list_free(struct netdev_dev_get_list *rsp); + +struct netdev_dev_get_list *netdev_dev_get_dump(struct ynl_sock *ys); + +/* NETDEV_CMD_DEV_GET - notify */ +struct netdev_dev_get_ntf { + __u16 family; + __u8 cmd; + struct ynl_ntf_base_type *next; + void (*free)(struct netdev_dev_get_ntf *ntf); + struct netdev_dev_get_rsp obj __attribute__ ((aligned (8))); +}; + +void netdev_dev_get_ntf_free(struct netdev_dev_get_ntf *rsp); + +#endif /* _LINUX_NETDEV_GEN_H */ diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile new file mode 100644 index 000000000000..d2e50fd0a52d --- /dev/null +++ b/tools/net/ynl/lib/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 + +CC=gcc +CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow +ifeq ("$(DEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif + +SRCS=$(wildcard *.c) +OBJS=$(patsubst %.c,%.o,${SRCS}) + +include $(wildcard *.d) + +all: ynl.a + +ynl.a: $(OBJS) + ar rcs $@ $(OBJS) +clean: + rm -f *.o *.d *~ + +hardclean: clean + rm -f *.a + +%.o: %.c + $(COMPILE.c) -MMD -c -o $@ $< + +.PHONY: all clean +.DEFAULT_GOAL=all diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index a0241add3839..c5d4a6d476a0 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -226,11 +226,15 @@ class SpecStructMember(SpecElement): Represents a single struct member attribute. Attributes: - type string, type of the member attribute + type string, type of the member attribute + byte_order string or None for native byte order + enum string, name of the enum definition """ def __init__(self, family, yaml): super().__init__(family, yaml) self.type = yaml['type'] + self.byte_order = yaml.get('byte-order') + self.enum = yaml.get('enum') class SpecStruct(SpecElement): @@ -320,12 +324,13 @@ class SpecFamily(SpecElement): Attributes: proto protocol type (e.g. genetlink) + msg_id_model enum-model for operations (unified, directional etc.) license spec license (loaded from an SPDX tag on the spec) attr_sets dict of attribute sets msgs dict of all messages (index by name) - msgs_by_value dict of all messages (indexed by name) ops dict of all valid requests / responses + ntfs dict of all async events consts dict of all constants/enums fixed_header string, optional name of family default fixed header struct """ @@ -345,6 +350,7 @@ class SpecFamily(SpecElement): super().__init__(self, spec) self.proto = self.yaml.get('protocol', 'genetlink') + self.msg_id_model = self.yaml['operations'].get('enum-model', 'unified') if schema_path is None: schema_path = os.path.dirname(os.path.dirname(spec_path)) + f'/{self.proto}.yaml' @@ -364,6 +370,7 @@ class SpecFamily(SpecElement): self.req_by_value = collections.OrderedDict() self.rsp_by_value = collections.OrderedDict() self.ops = collections.OrderedDict() + self.ntfs = collections.OrderedDict() self.consts = collections.OrderedDict() last_exception = None @@ -416,7 +423,7 @@ class SpecFamily(SpecElement): self.fixed_header = self.yaml['operations'].get('fixed-header') req_val = rsp_val = 1 for elem in self.yaml['operations']['list']: - if 'notify' in elem: + if 'notify' in elem or 'event' in elem: if 'value' in elem: rsp_val = elem['value'] req_val_next = req_val @@ -438,6 +445,10 @@ class SpecFamily(SpecElement): else: raise Exception("Can't parse directional ops") + if req_val == req_val_next: + req_val = None + if rsp_val == rsp_val_next: + rsp_val = None op = self.new_operation(elem, req_val, rsp_val) req_val = req_val_next rsp_val = rsp_val_next @@ -469,10 +480,9 @@ class SpecFamily(SpecElement): attr_set = self.new_attr_set(elem) self.attr_sets[elem['name']] = attr_set - msg_id_model = self.yaml['operations'].get('enum-model', 'unified') - if msg_id_model == 'unified': + if self.msg_id_model == 'unified': self._dictify_ops_unified() - elif msg_id_model == 'directional': + elif self.msg_id_model == 'directional': self._dictify_ops_directional() for op in self.msgs.values(): @@ -482,3 +492,5 @@ class SpecFamily(SpecElement): self.rsp_by_value[op.rsp_value] = op if not op.is_async and 'attribute-set' in op: self.ops[op.name] = op + elif op.is_async: + self.ntfs[op.name] = op diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c new file mode 100644 index 000000000000..514e0d69e731 --- /dev/null +++ b/tools/net/ynl/lib/ynl.c @@ -0,0 +1,901 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#include <errno.h> +#include <poll.h> +#include <string.h> +#include <stdlib.h> +#include <linux/types.h> + +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +#include "ynl.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(*arr)) + +#define __yerr_msg(yse, _msg...) \ + ({ \ + struct ynl_error *_yse = (yse); \ + \ + if (_yse) { \ + snprintf(_yse->msg, sizeof(_yse->msg) - 1, _msg); \ + _yse->msg[sizeof(_yse->msg) - 1] = 0; \ + } \ + }) + +#define __yerr_code(yse, _code...) \ + ({ \ + struct ynl_error *_yse = (yse); \ + \ + if (_yse) { \ + _yse->code = _code; \ + } \ + }) + +#define __yerr(yse, _code, _msg...) \ + ({ \ + __yerr_msg(yse, _msg); \ + __yerr_code(yse, _code); \ + }) + +#define __perr(yse, _msg) __yerr(yse, errno, _msg) + +#define yerr_msg(_ys, _msg...) __yerr_msg(&(_ys)->err, _msg) +#define yerr(_ys, _code, _msg...) __yerr(&(_ys)->err, _code, _msg) +#define perr(_ys, _msg) __yerr(&(_ys)->err, errno, _msg) + +/* -- Netlink boiler plate */ +static int +ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type, + char *str, int str_sz, int *n) +{ + if (!policy) { + if (*n < str_sz) + *n += snprintf(str, str_sz, "!policy"); + return 1; + } + + if (type > policy->max_attr) { + if (*n < str_sz) + *n += snprintf(str, str_sz, "!oob"); + return 1; + } + + if (!policy->table[type].name) { + if (*n < str_sz) + *n += snprintf(str, str_sz, "!name"); + return 1; + } + + if (*n < str_sz) + *n += snprintf(str, str_sz - *n, + ".%s", policy->table[type].name); + return 0; +} + +static int +ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, + struct ynl_policy_nest *policy, char *str, int str_sz, + struct ynl_policy_nest **nest_pol) +{ + unsigned int astart_off, aend_off; + const struct nlattr *attr; + unsigned int data_len; + unsigned int type; + bool found = false; + int n = 0; + + if (!policy) { + if (n < str_sz) + n += snprintf(str, str_sz, "!policy"); + return n; + } + + data_len = end - start; + + mnl_attr_for_each_payload(start, data_len) { + astart_off = (char *)attr - (char *)start; + aend_off = astart_off + mnl_attr_get_payload_len(attr); + if (aend_off <= off) + continue; + + found = true; + break; + } + if (!found) + return 0; + + off -= astart_off; + + type = mnl_attr_get_type(attr); + + if (ynl_err_walk_report_one(policy, type, str, str_sz, &n)) + return n; + + if (!off) { + if (nest_pol) + *nest_pol = policy->table[type].nest; + return n; + } + + if (!policy->table[type].nest) { + if (n < str_sz) + n += snprintf(str, str_sz, "!nest"); + return n; + } + + off -= sizeof(struct nlattr); + start = mnl_attr_get_payload(attr); + end = start + mnl_attr_get_payload_len(attr); + + return n + ynl_err_walk(ys, start, end, off, policy->table[type].nest, + &str[n], str_sz - n, nest_pol); +} + +#define NLMSGERR_ATTR_MISS_TYPE (NLMSGERR_ATTR_POLICY + 1) +#define NLMSGERR_ATTR_MISS_NEST (NLMSGERR_ATTR_POLICY + 2) +#define NLMSGERR_ATTR_MAX (NLMSGERR_ATTR_MAX + 2) + +static int +ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, + unsigned int hlen) +{ + const struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; + char miss_attr[sizeof(ys->err.msg)]; + char bad_attr[sizeof(ys->err.msg)]; + const struct nlattr *attr; + const char *str = NULL; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return MNL_CB_OK; + + mnl_attr_for_each(attr, nlh, hlen) { + unsigned int len, type; + + len = mnl_attr_get_payload_len(attr); + type = mnl_attr_get_type(attr); + + if (type > NLMSGERR_ATTR_MAX) + continue; + + tb[type] = attr; + + switch (type) { + case NLMSGERR_ATTR_OFFS: + case NLMSGERR_ATTR_MISS_TYPE: + case NLMSGERR_ATTR_MISS_NEST: + if (len != sizeof(__u32)) + return MNL_CB_ERROR; + break; + case NLMSGERR_ATTR_MSG: + str = mnl_attr_get_payload(attr); + if (str[len - 1]) + return MNL_CB_ERROR; + break; + default: + break; + } + } + + bad_attr[0] = '\0'; + miss_attr[0] = '\0'; + + if (tb[NLMSGERR_ATTR_OFFS]) { + unsigned int n, off; + void *start, *end; + + ys->err.attr_offs = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]); + + n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ", + str ? " (" : ""); + + start = mnl_nlmsg_get_payload_offset(ys->nlh, + sizeof(struct genlmsghdr)); + end = mnl_nlmsg_get_payload_tail(ys->nlh); + + off = ys->err.attr_offs; + off -= sizeof(struct nlmsghdr); + off -= sizeof(struct genlmsghdr); + + n += ynl_err_walk(ys, start, end, off, ys->req_policy, + &bad_attr[n], sizeof(bad_attr) - n, NULL); + + if (n >= sizeof(bad_attr)) + n = sizeof(bad_attr) - 1; + bad_attr[n] = '\0'; + } + if (tb[NLMSGERR_ATTR_MISS_TYPE]) { + struct ynl_policy_nest *nest_pol = NULL; + unsigned int n, off, type; + void *start, *end; + int n2; + + type = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_TYPE]); + + n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ", + bad_attr[0] ? ", " : (str ? " (" : "")); + + start = mnl_nlmsg_get_payload_offset(ys->nlh, + sizeof(struct genlmsghdr)); + end = mnl_nlmsg_get_payload_tail(ys->nlh); + + nest_pol = ys->req_policy; + if (tb[NLMSGERR_ATTR_MISS_NEST]) { + off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]); + off -= sizeof(struct nlmsghdr); + off -= sizeof(struct genlmsghdr); + + n += ynl_err_walk(ys, start, end, off, ys->req_policy, + &miss_attr[n], sizeof(miss_attr) - n, + &nest_pol); + } + + n2 = 0; + ynl_err_walk_report_one(nest_pol, type, &miss_attr[n], + sizeof(miss_attr) - n, &n2); + n += n2; + + if (n >= sizeof(miss_attr)) + n = sizeof(miss_attr) - 1; + miss_attr[n] = '\0'; + } + + /* Implicitly depend on ys->err.code already set */ + if (str) + yerr_msg(ys, "Kernel %s: '%s'%s%s%s", + ys->err.code ? "error" : "warning", + str, bad_attr, miss_attr, + bad_attr[0] || miss_attr[0] ? ")" : ""); + else if (bad_attr[0] || miss_attr[0]) + yerr_msg(ys, "Kernel %s: %s%s", + ys->err.code ? "error" : "warning", + bad_attr, miss_attr); + + return MNL_CB_OK; +} + +static int ynl_cb_error(const struct nlmsghdr *nlh, void *data) +{ + const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + struct ynl_parse_arg *yarg = data; + unsigned int hlen; + int code; + + code = err->error >= 0 ? err->error : -err->error; + yarg->ys->err.code = code; + errno = code; + + hlen = sizeof(*err); + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += mnl_nlmsg_get_payload_len(&err->msg); + + ynl_ext_ack_check(yarg->ys, nlh, hlen); + + return code ? MNL_CB_ERROR : MNL_CB_STOP; +} + +static int ynl_cb_done(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + int err; + + err = *(int *)NLMSG_DATA(nlh); + if (err < 0) { + yarg->ys->err.code = -err; + errno = -err; + + ynl_ext_ack_check(yarg->ys, nlh, sizeof(int)); + + return MNL_CB_ERROR; + } + return MNL_CB_STOP; +} + +static int ynl_cb_noop(const struct nlmsghdr *nlh, void *data) +{ + return MNL_CB_OK; +} + +mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE] = { + [NLMSG_NOOP] = ynl_cb_noop, + [NLMSG_ERROR] = ynl_cb_error, + [NLMSG_DONE] = ynl_cb_done, + [NLMSG_OVERRUN] = ynl_cb_noop, +}; + +/* Attribute validation */ + +int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) +{ + struct ynl_policy_attr *policy; + unsigned int type, len; + unsigned char *data; + + data = mnl_attr_get_payload(attr); + len = mnl_attr_get_payload_len(attr); + type = mnl_attr_get_type(attr); + if (type > yarg->rsp_policy->max_attr) { + yerr(yarg->ys, YNL_ERROR_INTERNAL, + "Internal error, validating unknown attribute"); + return -1; + } + + policy = &yarg->rsp_policy->table[type]; + + switch (policy->type) { + case YNL_PT_REJECT: + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Rejected attribute (%s)", policy->name); + return -1; + case YNL_PT_IGNORE: + break; + case YNL_PT_U8: + if (len == sizeof(__u8)) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (u8 %s)", policy->name); + return -1; + case YNL_PT_U16: + if (len == sizeof(__u16)) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (u16 %s)", policy->name); + return -1; + case YNL_PT_U32: + if (len == sizeof(__u32)) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (u32 %s)", policy->name); + return -1; + case YNL_PT_U64: + if (len == sizeof(__u64)) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (u64 %s)", policy->name); + return -1; + case YNL_PT_FLAG: + /* Let flags grow into real attrs, why not.. */ + break; + case YNL_PT_NEST: + if (!len || len >= sizeof(*attr)) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (nest %s)", policy->name); + return -1; + case YNL_PT_BINARY: + if (!policy->len || len == policy->len) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (binary %s)", policy->name); + return -1; + case YNL_PT_NUL_STR: + if ((!policy->len || len <= policy->len) && !data[len - 1]) + break; + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (string %s)", policy->name); + return -1; + default: + yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, + "Invalid attribute (unknown %s)", policy->name); + return -1; + } + + return 0; +} + +/* Generic code */ + +static void ynl_err_reset(struct ynl_sock *ys) +{ + ys->err.code = 0; + ys->err.attr_offs = 0; + ys->err.msg[0] = 0; +} + +struct nlmsghdr *ynl_msg_start(struct ynl_sock *ys, __u32 id, __u16 flags) +{ + struct nlmsghdr *nlh; + + ynl_err_reset(ys); + + nlh = ys->nlh = mnl_nlmsg_put_header(ys->tx_buf); + nlh->nlmsg_type = id; + nlh->nlmsg_flags = flags; + nlh->nlmsg_seq = ++ys->seq; + + return nlh; +} + +struct nlmsghdr * +ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags, + __u8 cmd, __u8 version) +{ + struct genlmsghdr gehdr; + struct nlmsghdr *nlh; + void *data; + + nlh = ynl_msg_start(ys, id, flags); + + memset(&gehdr, 0, sizeof(gehdr)); + gehdr.cmd = cmd; + gehdr.version = version; + + data = mnl_nlmsg_put_extra_header(nlh, sizeof(gehdr)); + memcpy(data, &gehdr, sizeof(gehdr)); + + return nlh; +} + +void ynl_msg_start_req(struct ynl_sock *ys, __u32 id) +{ + ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK); +} + +void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id) +{ + ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP); +} + +struct nlmsghdr * +ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) +{ + return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK, cmd, version); +} + +struct nlmsghdr * +ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version) +{ + return ynl_gemsg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + cmd, version); +} + +int ynl_recv_ack(struct ynl_sock *ys, int ret) +{ + if (!ret) { + yerr(ys, YNL_ERROR_EXPECT_ACK, + "Expecting an ACK but nothing received"); + return -1; + } + + ret = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perr(ys, "Socket receive failed"); + return ret; + } + return mnl_cb_run(ys->rx_buf, ret, ys->seq, ys->portid, + ynl_cb_null, ys); +} + +int ynl_cb_null(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + + yerr(yarg->ys, YNL_ERROR_UNEXPECT_MSG, + "Received a message when none were expected"); + + return MNL_CB_ERROR; +} + +/* Init/fini and genetlink boiler plate */ +static int +ynl_get_family_info_mcast(struct ynl_sock *ys, const struct nlattr *mcasts) +{ + const struct nlattr *entry, *attr; + unsigned int i; + + mnl_attr_for_each_nested(attr, mcasts) + ys->n_mcast_groups++; + + if (!ys->n_mcast_groups) + return 0; + + ys->mcast_groups = calloc(ys->n_mcast_groups, + sizeof(*ys->mcast_groups)); + if (!ys->mcast_groups) + return MNL_CB_ERROR; + + i = 0; + mnl_attr_for_each_nested(entry, mcasts) { + mnl_attr_for_each_nested(attr, entry) { + if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_ID) + ys->mcast_groups[i].id = mnl_attr_get_u32(attr); + if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GRP_NAME) { + strncpy(ys->mcast_groups[i].name, + mnl_attr_get_str(attr), + GENL_NAMSIZ - 1); + ys->mcast_groups[i].name[GENL_NAMSIZ - 1] = 0; + } + } + } + + return 0; +} + +static int ynl_get_family_info_cb(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct ynl_sock *ys = yarg->ys; + const struct nlattr *attr; + bool found_id = true; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + if (mnl_attr_get_type(attr) == CTRL_ATTR_MCAST_GROUPS) + if (ynl_get_family_info_mcast(ys, attr)) + return MNL_CB_ERROR; + + if (mnl_attr_get_type(attr) != CTRL_ATTR_FAMILY_ID) + continue; + + if (mnl_attr_get_payload_len(attr) != sizeof(__u16)) { + yerr(ys, YNL_ERROR_ATTR_INVALID, "Invalid family ID"); + return MNL_CB_ERROR; + } + + ys->family_id = mnl_attr_get_u16(attr); + found_id = true; + } + + if (!found_id) { + yerr(ys, YNL_ERROR_ATTR_MISSING, "Family ID missing"); + return MNL_CB_ERROR; + } + return MNL_CB_OK; +} + +static int ynl_sock_read_family(struct ynl_sock *ys, const char *family_name) +{ + struct ynl_parse_arg yarg = { .ys = ys, }; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 1); + mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + + err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len); + if (err < 0) { + perr(ys, "failed to request socket family info"); + return err; + } + + err = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE); + if (err <= 0) { + perr(ys, "failed to receive the socket family info"); + return err; + } + err = mnl_cb_run2(ys->rx_buf, err, ys->seq, ys->portid, + ynl_get_family_info_cb, &yarg, + ynl_cb_array, ARRAY_SIZE(ynl_cb_array)); + if (err < 0) { + free(ys->mcast_groups); + perr(ys, "failed to receive the socket family info - no such family?"); + return err; + } + + return ynl_recv_ack(ys, err); +} + +struct ynl_sock * +ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) +{ + struct ynl_sock *ys; + int one = 1; + + ys = malloc(sizeof(*ys) + 2 * MNL_SOCKET_BUFFER_SIZE); + if (!ys) + return NULL; + memset(ys, 0, sizeof(*ys)); + + ys->family = yf; + ys->tx_buf = &ys->raw_buf[0]; + ys->rx_buf = &ys->raw_buf[MNL_SOCKET_BUFFER_SIZE]; + ys->ntf_last_next = &ys->ntf_first; + + ys->sock = mnl_socket_open(NETLINK_GENERIC); + if (!ys->sock) { + __perr(yse, "failed to create a netlink socket"); + goto err_free_sock; + } + + if (mnl_socket_setsockopt(ys->sock, NETLINK_CAP_ACK, + &one, sizeof(one))) { + __perr(yse, "failed to enable netlink ACK"); + goto err_close_sock; + } + if (mnl_socket_setsockopt(ys->sock, NETLINK_EXT_ACK, + &one, sizeof(one))) { + __perr(yse, "failed to enable netlink ext ACK"); + goto err_close_sock; + } + + ys->seq = random(); + ys->portid = mnl_socket_get_portid(ys->sock); + + if (ynl_sock_read_family(ys, yf->name)) { + if (yse) + memcpy(yse, &ys->err, sizeof(*yse)); + goto err_close_sock; + } + + return ys; + +err_close_sock: + mnl_socket_close(ys->sock); +err_free_sock: + free(ys); + return NULL; +} + +void ynl_sock_destroy(struct ynl_sock *ys) +{ + struct ynl_ntf_base_type *ntf; + + mnl_socket_close(ys->sock); + while ((ntf = ynl_ntf_dequeue(ys))) + ynl_ntf_free(ntf); + free(ys->mcast_groups); + free(ys); +} + +/* YNL multicast handling */ + +void ynl_ntf_free(struct ynl_ntf_base_type *ntf) +{ + ntf->free(ntf); +} + +int ynl_subscribe(struct ynl_sock *ys, const char *grp_name) +{ + unsigned int i; + int err; + + for (i = 0; i < ys->n_mcast_groups; i++) + if (!strcmp(ys->mcast_groups[i].name, grp_name)) + break; + if (i == ys->n_mcast_groups) { + yerr(ys, ENOENT, "Multicast group '%s' not found", grp_name); + return -1; + } + + err = mnl_socket_setsockopt(ys->sock, NETLINK_ADD_MEMBERSHIP, + &ys->mcast_groups[i].id, + sizeof(ys->mcast_groups[i].id)); + if (err < 0) { + perr(ys, "Subscribing to multicast group failed"); + return -1; + } + + return 0; +} + +int ynl_socket_get_fd(struct ynl_sock *ys) +{ + return mnl_socket_get_fd(ys->sock); +} + +struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys) +{ + struct ynl_ntf_base_type *ntf; + + if (!ynl_has_ntf(ys)) + return NULL; + + ntf = ys->ntf_first; + ys->ntf_first = ntf->next; + if (ys->ntf_last_next == &ntf->next) + ys->ntf_last_next = &ys->ntf_first; + + return ntf; +} + +static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh) +{ + struct ynl_parse_arg yarg = { .ys = ys, }; + const struct ynl_ntf_info *info; + struct ynl_ntf_base_type *rsp; + struct genlmsghdr *gehdr; + int ret; + + gehdr = mnl_nlmsg_get_payload(nlh); + if (gehdr->cmd >= ys->family->ntf_info_size) + return MNL_CB_ERROR; + info = &ys->family->ntf_info[gehdr->cmd]; + if (!info->cb) + return MNL_CB_ERROR; + + rsp = calloc(1, info->alloc_sz); + rsp->free = info->free; + yarg.data = rsp->data; + yarg.rsp_policy = info->policy; + + ret = info->cb(nlh, &yarg); + if (ret <= MNL_CB_STOP) + goto err_free; + + rsp->family = nlh->nlmsg_type; + rsp->cmd = gehdr->cmd; + + *ys->ntf_last_next = rsp; + ys->ntf_last_next = &rsp->next; + + return MNL_CB_OK; + +err_free: + info->free(rsp); + return MNL_CB_ERROR; +} + +static int ynl_ntf_trampoline(const struct nlmsghdr *nlh, void *data) +{ + return ynl_ntf_parse((struct ynl_sock *)data, nlh); +} + +int ynl_ntf_check(struct ynl_sock *ys) +{ + ssize_t len; + int err; + + do { + /* libmnl doesn't let us pass flags to the recv to make + * it non-blocking so we need to poll() or peek() :| + */ + struct pollfd pfd = { }; + + pfd.fd = mnl_socket_get_fd(ys->sock); + pfd.events = POLLIN; + err = poll(&pfd, 1, 1); + if (err < 1) + return err; + + len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, + MNL_SOCKET_BUFFER_SIZE); + if (len < 0) + return len; + + err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, + ynl_ntf_trampoline, ys, + ynl_cb_array, NLMSG_MIN_TYPE); + if (err < 0) + return err; + } while (err > 0); + + return 0; +} + +/* YNL specific helpers used by the auto-generated code */ + +struct ynl_dump_list_type *YNL_LIST_END = (void *)(0xb4d123); + +void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd) +{ + yerr(ys, YNL_ERROR_UNKNOWN_NTF, + "Unknown notification message type '%d'", cmd); +} + +int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg) +{ + yerr(yarg->ys, YNL_ERROR_INV_RESP, "Error parsing response: %s", msg); + return MNL_CB_ERROR; +} + +static int +ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd) +{ + struct genlmsghdr *gehdr; + + if (mnl_nlmsg_get_payload_len(nlh) < sizeof(*gehdr)) { + yerr(ys, YNL_ERROR_INV_RESP, + "Kernel responded with truncated message"); + return -1; + } + + gehdr = mnl_nlmsg_get_payload(nlh); + if (gehdr->cmd != rsp_cmd) + return ynl_ntf_parse(ys, nlh); + + return 0; +} + +static int ynl_req_trampoline(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_req_state *yrs = data; + int ret; + + ret = ynl_check_alien(yrs->yarg.ys, nlh, yrs->rsp_cmd); + if (ret) + return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK; + + return yrs->cb(nlh, &yrs->yarg); +} + +int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_req_state *yrs) +{ + ssize_t len; + int err; + + err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); + if (err < 0) + return err; + + do { + len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, + MNL_SOCKET_BUFFER_SIZE); + if (len < 0) + return len; + + err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, + ynl_req_trampoline, yrs, + ynl_cb_array, NLMSG_MIN_TYPE); + if (err < 0) + return err; + } while (err > 0); + + return 0; +} + +static int ynl_dump_trampoline(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_dump_state *ds = data; + struct ynl_dump_list_type *obj; + struct ynl_parse_arg yarg = {}; + int ret; + + ret = ynl_check_alien(ds->ys, nlh, ds->rsp_cmd); + if (ret) + return ret < 0 ? MNL_CB_ERROR : MNL_CB_OK; + + obj = calloc(1, ds->alloc_sz); + if (!obj) + return MNL_CB_ERROR; + + if (!ds->first) + ds->first = obj; + if (ds->last) + ds->last->next = obj; + ds->last = obj; + + yarg.ys = ds->ys; + yarg.rsp_policy = ds->rsp_policy; + yarg.data = &obj->data; + + return ds->cb(nlh, &yarg); +} + +static void *ynl_dump_end(struct ynl_dump_state *ds) +{ + if (!ds->first) + return YNL_LIST_END; + + ds->last->next = YNL_LIST_END; + return ds->first; +} + +int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_dump_state *yds) +{ + ssize_t len; + int err; + + err = mnl_socket_sendto(ys->sock, req_nlh, req_nlh->nlmsg_len); + if (err < 0) + return err; + + do { + len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, + MNL_SOCKET_BUFFER_SIZE); + if (len < 0) + goto err_close_list; + + err = mnl_cb_run2(ys->rx_buf, len, ys->seq, ys->portid, + ynl_dump_trampoline, yds, + ynl_cb_array, NLMSG_MIN_TYPE); + if (err < 0) + goto err_close_list; + } while (err > 0); + + yds->first = ynl_dump_end(yds); + return 0; + +err_close_list: + yds->first = ynl_dump_end(yds); + return -1; +} diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h new file mode 100644 index 000000000000..9eafa3552c16 --- /dev/null +++ b/tools/net/ynl/lib/ynl.h @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +#ifndef __YNL_C_H +#define __YNL_C_H 1 + +#include <stddef.h> +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> +#include <linux/types.h> + +struct mnl_socket; +struct nlmsghdr; + +/* + * User facing code + */ + +struct ynl_ntf_base_type; +struct ynl_ntf_info; +struct ynl_sock; + +enum ynl_error_code { + YNL_ERROR_NONE = 0, + __YNL_ERRNO_END = 4096, + YNL_ERROR_INTERNAL, + YNL_ERROR_EXPECT_ACK, + YNL_ERROR_EXPECT_MSG, + YNL_ERROR_UNEXPECT_MSG, + YNL_ERROR_ATTR_MISSING, + YNL_ERROR_ATTR_INVALID, + YNL_ERROR_UNKNOWN_NTF, + YNL_ERROR_INV_RESP, +}; + +/** + * struct ynl_error - error encountered by YNL + * @code: errno (low values) or YNL error code (enum ynl_error_code) + * @attr_offs: offset of bad attribute (for very advanced users) + * @msg: error message + * + * Error information for when YNL operations fail. + * Users should interact with the err member of struct ynl_sock directly. + * The main exception to that rule is ynl_sock_create(). + */ +struct ynl_error { + enum ynl_error_code code; + unsigned int attr_offs; + char msg[512]; +}; + +/** + * struct ynl_family - YNL family info + * Family description generated by codegen. Pass to ynl_sock_create(). + */ +struct ynl_family { +/* private: */ + const char *name; + const struct ynl_ntf_info *ntf_info; + unsigned int ntf_info_size; +}; + +/** + * struct ynl_sock - YNL wrapped netlink socket + * @err: YNL error descriptor, cleared on every request. + */ +struct ynl_sock { + struct ynl_error err; + +/* private: */ + const struct ynl_family *family; + struct mnl_socket *sock; + __u32 seq; + __u32 portid; + __u16 family_id; + + unsigned int n_mcast_groups; + struct { + unsigned int id; + char name[GENL_NAMSIZ]; + } *mcast_groups; + + struct ynl_ntf_base_type *ntf_first; + struct ynl_ntf_base_type **ntf_last_next; + + struct nlmsghdr *nlh; + struct ynl_policy_nest *req_policy; + unsigned char *tx_buf; + unsigned char *rx_buf; + unsigned char raw_buf[]; +}; + +struct ynl_sock * +ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e); +void ynl_sock_destroy(struct ynl_sock *ys); + +#define ynl_dump_foreach(dump, iter) \ + for (typeof(dump->obj) *iter = &dump->obj; \ + !ynl_dump_obj_is_last(iter); \ + iter = ynl_dump_obj_next(iter)) + +int ynl_subscribe(struct ynl_sock *ys, const char *grp_name); +int ynl_socket_get_fd(struct ynl_sock *ys); +int ynl_ntf_check(struct ynl_sock *ys); + +/** + * ynl_has_ntf() - check if socket has *parsed* notifications + * @ys: active YNL socket + * + * Note that this does not take into account notifications sitting + * in netlink socket, just the notifications which have already been + * read and parsed (e.g. during a ynl_ntf_check() call). + */ +static inline bool ynl_has_ntf(struct ynl_sock *ys) +{ + return ys->ntf_last_next != &ys->ntf_first; +} +struct ynl_ntf_base_type *ynl_ntf_dequeue(struct ynl_sock *ys); + +void ynl_ntf_free(struct ynl_ntf_base_type *ntf); + +/* + * YNL internals / low level stuff + */ + +/* Generic mnl helper code */ + +enum ynl_policy_type { + YNL_PT_REJECT = 1, + YNL_PT_IGNORE, + YNL_PT_NEST, + YNL_PT_FLAG, + YNL_PT_BINARY, + YNL_PT_U8, + YNL_PT_U16, + YNL_PT_U32, + YNL_PT_U64, + YNL_PT_NUL_STR, +}; + +struct ynl_policy_attr { + enum ynl_policy_type type; + unsigned int len; + const char *name; + struct ynl_policy_nest *nest; +}; + +struct ynl_policy_nest { + unsigned int max_attr; + struct ynl_policy_attr *table; +}; + +struct ynl_parse_arg { + struct ynl_sock *ys; + struct ynl_policy_nest *rsp_policy; + void *data; +}; + +struct ynl_dump_list_type { + struct ynl_dump_list_type *next; + unsigned char data[] __attribute__ ((aligned (8))); +}; +extern struct ynl_dump_list_type *YNL_LIST_END; + +static inline bool ynl_dump_obj_is_last(void *obj) +{ + unsigned long uptr = (unsigned long)obj; + + uptr -= offsetof(struct ynl_dump_list_type, data); + return uptr == (unsigned long)YNL_LIST_END; +} + +static inline void *ynl_dump_obj_next(void *obj) +{ + unsigned long uptr = (unsigned long)obj; + struct ynl_dump_list_type *list; + + uptr -= offsetof(struct ynl_dump_list_type, data); + list = (void *)uptr; + uptr = (unsigned long)list->next; + uptr += offsetof(struct ynl_dump_list_type, data); + + return (void *)uptr; +} + +struct ynl_ntf_base_type { + __u16 family; + __u8 cmd; + struct ynl_ntf_base_type *next; + void (*free)(struct ynl_ntf_base_type *ntf); + unsigned char data[] __attribute__ ((aligned (8))); +}; + +extern mnl_cb_t ynl_cb_array[NLMSG_MIN_TYPE]; + +struct nlmsghdr * +ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); +struct nlmsghdr * +ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version); + +int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr); + +int ynl_recv_ack(struct ynl_sock *ys, int ret); +int ynl_cb_null(const struct nlmsghdr *nlh, void *data); + +/* YNL specific helpers used by the auto-generated code */ + +struct ynl_req_state { + struct ynl_parse_arg yarg; + mnl_cb_t cb; + __u32 rsp_cmd; +}; + +struct ynl_dump_state { + struct ynl_sock *ys; + struct ynl_policy_nest *rsp_policy; + void *first; + struct ynl_dump_list_type *last; + size_t alloc_sz; + mnl_cb_t cb; + __u32 rsp_cmd; +}; + +struct ynl_ntf_info { + struct ynl_policy_nest *policy; + mnl_cb_t cb; + size_t alloc_sz; + void (*free)(struct ynl_ntf_base_type *ntf); +}; + +int ynl_exec(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_req_state *yrs); +int ynl_exec_dump(struct ynl_sock *ys, struct nlmsghdr *req_nlh, + struct ynl_dump_state *yds); + +void ynl_error_unknown_notification(struct ynl_sock *ys, __u8 cmd); +int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); + +#endif diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index aa77bcae4807..3b343d6cbbc0 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -1,10 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +from collections import namedtuple import functools import os import random import socket import struct +from struct import Struct import yaml from .nlspec import SpecFamily @@ -76,10 +78,17 @@ class NlError(Exception): class NlAttr: - type_formats = { 'u8' : ('B', 1), 's8' : ('b', 1), - 'u16': ('H', 2), 's16': ('h', 2), - 'u32': ('I', 4), 's32': ('i', 4), - 'u64': ('Q', 8), 's64': ('q', 8) } + ScalarFormat = namedtuple('ScalarFormat', ['native', 'big', 'little']) + type_formats = { + 'u8' : ScalarFormat(Struct('B'), Struct("B"), Struct("B")), + 's8' : ScalarFormat(Struct('b'), Struct("b"), Struct("b")), + 'u16': ScalarFormat(Struct('H'), Struct(">H"), Struct("<H")), + 's16': ScalarFormat(Struct('h'), Struct(">h"), Struct("<h")), + 'u32': ScalarFormat(Struct('I'), Struct(">I"), Struct("<I")), + 's32': ScalarFormat(Struct('i'), Struct(">i"), Struct("<i")), + 'u64': ScalarFormat(Struct('Q'), Struct(">Q"), Struct("<Q")), + 's64': ScalarFormat(Struct('q'), Struct(">q"), Struct("<q")) + } def __init__(self, raw, offset): self._len, self._type = struct.unpack("HH", raw[offset:offset + 4]) @@ -88,25 +97,17 @@ class NlAttr: self.full_len = (self.payload_len + 3) & ~3 self.raw = raw[offset + 4:offset + self.payload_len] - def format_byte_order(byte_order): + @classmethod + def get_format(cls, attr_type, byte_order=None): + format = cls.type_formats[attr_type] if byte_order: - return ">" if byte_order == "big-endian" else "<" - return "" + return format.big if byte_order == "big-endian" \ + else format.little + return format.native - def as_u8(self): - return struct.unpack("B", self.raw)[0] - - def as_u16(self, byte_order=None): - endian = NlAttr.format_byte_order(byte_order) - return struct.unpack(f"{endian}H", self.raw)[0] - - def as_u32(self, byte_order=None): - endian = NlAttr.format_byte_order(byte_order) - return struct.unpack(f"{endian}I", self.raw)[0] - - def as_u64(self, byte_order=None): - endian = NlAttr.format_byte_order(byte_order) - return struct.unpack(f"{endian}Q", self.raw)[0] + def as_scalar(self, attr_type, byte_order=None): + format = self.get_format(attr_type, byte_order) + return format.unpack(self.raw)[0] def as_strz(self): return self.raw.decode('ascii')[:-1] @@ -115,17 +116,17 @@ class NlAttr: return self.raw def as_c_array(self, type): - format, _ = self.type_formats[type] - return list({ x[0] for x in struct.iter_unpack(format, self.raw) }) + format = self.get_format(type) + return [ x[0] for x in format.iter_unpack(self.raw) ] def as_struct(self, members): value = dict() offset = 0 for m in members: # TODO: handle non-scalar members - format, size = self.type_formats[m.type] - decoded = struct.unpack_from(format, self.raw, offset) - offset += size + format = self.get_format(m.type, m.byte_order) + decoded = format.unpack_from(self.raw, offset) + offset += format.size value[m.name] = decoded[0] return value @@ -184,11 +185,11 @@ class NlMsg: if extack.type == Netlink.NLMSGERR_ATTR_MSG: self.extack['msg'] = extack.as_strz() elif extack.type == Netlink.NLMSGERR_ATTR_MISS_TYPE: - self.extack['miss-type'] = extack.as_u32() + self.extack['miss-type'] = extack.as_scalar('u32') elif extack.type == Netlink.NLMSGERR_ATTR_MISS_NEST: - self.extack['miss-nest'] = extack.as_u32() + self.extack['miss-nest'] = extack.as_scalar('u32') elif extack.type == Netlink.NLMSGERR_ATTR_OFFS: - self.extack['bad-attr-offs'] = extack.as_u32() + self.extack['bad-attr-offs'] = extack.as_scalar('u32') else: if 'unknown' not in self.extack: self.extack['unknown'] = [] @@ -272,11 +273,11 @@ def _genl_load_families(): fam = dict() for attr in gm.raw_attrs: if attr.type == Netlink.CTRL_ATTR_FAMILY_ID: - fam['id'] = attr.as_u16() + fam['id'] = attr.as_scalar('u16') elif attr.type == Netlink.CTRL_ATTR_FAMILY_NAME: fam['name'] = attr.as_strz() elif attr.type == Netlink.CTRL_ATTR_MAXATTR: - fam['maxattr'] = attr.as_u32() + fam['maxattr'] = attr.as_scalar('u32') elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS: fam['mcast'] = dict() for entry in NlAttrs(attr.raw): @@ -286,7 +287,7 @@ def _genl_load_families(): if entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_NAME: mcast_name = entry_attr.as_strz() elif entry_attr.type == Netlink.CTRL_ATTR_MCAST_GRP_ID: - mcast_id = entry_attr.as_u32() + mcast_id = entry_attr.as_scalar('u32') if mcast_name and mcast_id is not None: fam['mcast'][mcast_name] = mcast_id if 'name' in fam and 'id' in fam: @@ -304,9 +305,9 @@ class GenlMsg: self.fixed_header_attrs = dict() for m in fixed_header_members: - format, size = NlAttr.type_formats[m.type] - decoded = struct.unpack_from(format, nl_msg.raw, offset) - offset += size + format = NlAttr.get_format(m.type, m.byte_order) + decoded = format.unpack_from(nl_msg.raw, offset) + offset += format.size self.fixed_header_attrs[m.name] = decoded[0] self.raw = nl_msg.raw[offset:] @@ -381,21 +382,13 @@ class YnlFamily(SpecFamily): attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue) elif attr["type"] == 'flag': attr_payload = b'' - elif attr["type"] == 'u8': - attr_payload = struct.pack("B", int(value)) - elif attr["type"] == 'u16': - endian = NlAttr.format_byte_order(attr.byte_order) - attr_payload = struct.pack(f"{endian}H", int(value)) - elif attr["type"] == 'u32': - endian = NlAttr.format_byte_order(attr.byte_order) - attr_payload = struct.pack(f"{endian}I", int(value)) - elif attr["type"] == 'u64': - endian = NlAttr.format_byte_order(attr.byte_order) - attr_payload = struct.pack(f"{endian}Q", int(value)) elif attr["type"] == 'string': attr_payload = str(value).encode('ascii') + b'\x00' elif attr["type"] == 'binary': attr_payload = value + elif attr['type'] in NlAttr.type_formats: + format = NlAttr.get_format(attr['type'], attr.byte_order) + attr_payload = format.pack(int(value)) else: raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}') @@ -419,7 +412,11 @@ class YnlFamily(SpecFamily): def _decode_binary(self, attr, attr_spec): if attr_spec.struct_name: - decoded = attr.as_struct(self.consts[attr_spec.struct_name]) + members = self.consts[attr_spec.struct_name] + decoded = attr.as_struct(members) + for m in members: + if m.enum: + self._decode_enum(decoded, m) elif attr_spec.sub_type: decoded = attr.as_c_array(attr_spec.sub_type) else: @@ -434,22 +431,16 @@ class YnlFamily(SpecFamily): if attr_spec["type"] == 'nest': subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes']) decoded = subdict - elif attr_spec['type'] == 'u8': - decoded = attr.as_u8() - elif attr_spec['type'] == 'u16': - decoded = attr.as_u16(attr_spec.byte_order) - elif attr_spec['type'] == 'u32': - decoded = attr.as_u32(attr_spec.byte_order) - elif attr_spec['type'] == 'u64': - decoded = attr.as_u64(attr_spec.byte_order) elif attr_spec["type"] == 'string': decoded = attr.as_strz() elif attr_spec["type"] == 'binary': decoded = self._decode_binary(attr, attr_spec) elif attr_spec["type"] == 'flag': decoded = True + elif attr_spec["type"] in NlAttr.type_formats: + decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) else: - raise Exception(f'Unknown {attr.type} {attr_spec["name"]} {attr_spec["type"]}') + raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') if not attr_spec.is_multi: rsp[attr_spec['name']] = decoded @@ -554,9 +545,9 @@ class YnlFamily(SpecFamily): if op.fixed_header: fixed_header_members = self.consts[op.fixed_header].members for m in fixed_header_members: - value = vals.pop(m.name) - format, _ = NlAttr.type_formats[m.type] - msg += struct.pack(format, value) + value = vals.pop(m.name) if m.name in vals else 0 + format = NlAttr.get_format(m.type, m.byte_order) + msg += format.pack(value) for name, value in vals.items(): msg += self._add_attr(op.attr_set.name, name, value) msg = _genl_msg_finalize(msg) @@ -591,8 +582,9 @@ class YnlFamily(SpecFamily): print('Unexpected message: ' + repr(gm)) continue - rsp.append(self._decode(gm.raw_attrs, op.attr_set.name) - | gm.fixed_header_attrs) + rsp_msg = self._decode(gm.raw_attrs, op.attr_set.name) + rsp_msg.update(gm.fixed_header_attrs) + rsp.append(rsp_msg) if not rsp: return None diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore new file mode 100644 index 000000000000..a24678b67557 --- /dev/null +++ b/tools/net/ynl/samples/.gitignore @@ -0,0 +1,2 @@ +devlink +netdev diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile new file mode 100644 index 000000000000..714316cad45f --- /dev/null +++ b/tools/net/ynl/samples/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0 + +CC=gcc +CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ + -I../lib/ -I../generated/ +ifeq ("$(DEBUG)","1") + CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan +endif + +LDLIBS=-lmnl ../lib/ynl.a ../generated/protos.a + +SRCS=$(wildcard *.c) +BINS=$(patsubst %.c,%,${SRCS}) + +include $(wildcard *.d) + +all: $(BINS) + +$(BINS): ../lib/ynl.a ../generated/protos.a + +clean: + rm -f *.o *.d *~ + +hardclean: clean + rm -f $(BINS) + +.PHONY: all clean +.DEFAULT_GOAL=all diff --git a/tools/net/ynl/samples/devlink.c b/tools/net/ynl/samples/devlink.c new file mode 100644 index 000000000000..d2611d7ebab4 --- /dev/null +++ b/tools/net/ynl/samples/devlink.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> + +#include <ynl.h> + +#include "devlink-user.h" + +int main(int argc, char **argv) +{ + struct devlink_get_list *devs; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_devlink_family, NULL); + if (!ys) + return 1; + + devs = devlink_get_dump(ys); + if (!devs) + goto err_close; + + ynl_dump_foreach(devs, d) { + struct devlink_info_get_req *info_req; + struct devlink_info_get_rsp *info_rsp; + + printf("%s/%s:\n", d->bus_name, d->dev_name); + + info_req = devlink_info_get_req_alloc(); + devlink_info_get_req_set_bus_name(info_req, d->bus_name); + devlink_info_get_req_set_dev_name(info_req, d->dev_name); + + info_rsp = devlink_info_get(ys, info_req); + devlink_info_get_req_free(info_req); + if (!info_rsp) + goto err_free_devs; + + if (info_rsp->_present.info_driver_name_len) + printf(" driver: %s\n", info_rsp->info_driver_name); + if (info_rsp->n_info_version_running) + printf(" running fw:\n"); + for (unsigned i = 0; i < info_rsp->n_info_version_running; i++) + printf(" %s: %s\n", + info_rsp->info_version_running[i].info_version_name, + info_rsp->info_version_running[i].info_version_value); + printf(" ...\n"); + devlink_info_get_rsp_free(info_rsp); + } + devlink_get_list_free(devs); + + ynl_sock_destroy(ys); + + return 0; + +err_free_devs: + devlink_get_list_free(devs); +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c new file mode 100644 index 000000000000..d31268aa47c5 --- /dev/null +++ b/tools/net/ynl/samples/netdev.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> + +#include <ynl.h> + +#include <net/if.h> + +#include "netdev-user.h" + +/* netdev genetlink family code sample + * This sample shows off basics of the netdev family but also notification + * handling, hence the somewhat odd UI. We subscribe to notifications first + * then wait for ifc selection, so the socket may already accumulate + * notifications as we wait. This allows us to test that YNL can handle + * requests and notifications getting interleaved. + */ + +static void netdev_print_device(struct netdev_dev_get_rsp *d, unsigned int op) +{ + char ifname[IF_NAMESIZE]; + const char *name; + + if (!d->_present.ifindex) + return; + + name = if_indextoname(d->ifindex, ifname); + if (name) + printf("%8s", name); + printf("[%d]\t", d->ifindex); + + if (!d->_present.xdp_features) + return; + + printf("%llx:", d->xdp_features); + for (int i = 0; d->xdp_features > 1U << i; i++) { + if (d->xdp_features & (1U << i)) + printf(" %s", netdev_xdp_act_str(1 << i)); + } + + name = netdev_op_str(op); + if (name) + printf(" (ntf: %s)", name); + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct netdev_dev_get_list *devs; + struct ynl_ntf_base_type *ntf; + struct ynl_error yerr; + struct ynl_sock *ys; + int ifindex = 0; + + if (argc > 1) + ifindex = strtol(argv[1], NULL, 0); + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + if (ynl_subscribe(ys, "mgmt")) + goto err_close; + + printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): "); + scanf("%d", &ifindex); + + if (ifindex > 0) { + struct netdev_dev_get_req *req; + struct netdev_dev_get_rsp *d; + + req = netdev_dev_get_req_alloc(); + netdev_dev_get_req_set_ifindex(req, ifindex); + + d = netdev_dev_get(ys, req); + netdev_dev_get_req_free(req); + if (!d) + goto err_close; + + netdev_print_device(d, 0); + netdev_dev_get_rsp_free(d); + } else if (!ifindex) { + devs = netdev_dev_get_dump(ys); + if (!devs) + goto err_close; + + ynl_dump_foreach(devs, d) + netdev_print_device(d, 0); + netdev_dev_get_list_free(devs); + } else if (ifindex == -2) { + ynl_ntf_check(ys); + } + while ((ntf = ynl_ntf_dequeue(ys))) { + netdev_print_device((struct netdev_dev_get_rsp *)&ntf->data, + ntf->cmd); + ynl_ntf_free(ntf); + } + + ynl_sock_destroy(ys); + return 0; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index cc2f8c945340..7b051c00cfc3 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -94,7 +94,10 @@ class Type(SpecAttr): def arg_member(self, ri): member = self._complex_member_type(ri) if member: - return [member + ' *' + self.c_name] + arg = [member + ' *' + self.c_name] + if self.presence_type() == 'count': + arg += ['unsigned int n_' + self.c_name] + return arg raise Exception(f"Struct member not implemented for class type {self.type}") def struct_member(self, ri): @@ -150,7 +153,7 @@ class Type(SpecAttr): init_lines = [init_lines] kw = 'if' if first else 'else if' - ri.cw.block_start(line=f"{kw} (mnl_attr_get_type(attr) == {self.enum_name})") + ri.cw.block_start(line=f"{kw} (type == {self.enum_name})") if local_vars: for local in local_vars: ri.cw.p(local) @@ -170,6 +173,7 @@ class Type(SpecAttr): for line in lines: ri.cw.p(line) ri.cw.block_end() + return True def _setter_lines(self, ri, member, presence): raise Exception(f"Setter not implemented for class type {self.type}") @@ -187,9 +191,12 @@ class Type(SpecAttr): code.append(presence + ' = 1;') code += self._setter_lines(ri, member, presence) - ri.cw.write_func('static inline void', - f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}", - body=code, + func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" + free = bool([x for x in code if 'free(' in x]) + alloc = bool([x for x in code if 'alloc(' in x]) + if free and not alloc: + func_name = '__' + func_name + ri.cw.write_func('static inline void', func_name, body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -197,6 +204,12 @@ class TypeUnused(Type): def presence_type(self): return '' + def arg_member(self, ri): + return [] + + def _attr_get(self, ri, var): + return ['return MNL_CB_ERROR;'], None, None + def _attr_typol(self): return '.type = YNL_PT_REJECT, ' @@ -208,12 +221,24 @@ class TypePad(Type): def presence_type(self): return '' + def arg_member(self, ri): + return [] + def _attr_typol(self): - return '.type = YNL_PT_REJECT, ' + return '.type = YNL_PT_IGNORE, ' + + def attr_put(self, ri, var): + pass + + def attr_get(self, ri, var, first): + pass def attr_policy(self, cw): pass + def setter(self, ri, space, direction, deref=False, ref=None): + pass + class TypeScalar(Type): def __init__(self, family, attr_set, attr, value): @@ -411,7 +436,8 @@ class TypeNest(Type): f"{self.enum_name}, &{var}->{self.c_name})") def _attr_get(self, ri, var): - get_lines = [f"{self.nested_render_name}_parse(&parg, attr);"] + get_lines = [f"if ({self.nested_render_name}_parse(&parg, attr))", + "return MNL_CB_ERROR;"] init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;", f"parg.data = &{var}->{self.c_name};"] return get_lines, init_lines, None @@ -430,6 +456,13 @@ class TypeMultiAttr(Type): def presence_type(self): return 'count' + def _mnl_type(self): + t = self.type + # mnl does not have a helper for signed types + if t[0] == 's': + t = 'u' + t[1:] + return t + def _complex_member_type(self, ri): if 'type' not in self.attr or self.attr['type'] == 'nest': return f"struct {self.nested_render_name}" @@ -443,9 +476,14 @@ class TypeMultiAttr(Type): return 'type' not in self.attr or self.attr['type'] == 'nest' def free(self, ri, var, ref): - if 'type' not in self.attr or self.attr['type'] == 'nest': + if self.attr['type'] in scalars: + ri.cw.p(f"free({var}->{ref}{self.c_name});") + elif 'type' not in self.attr or self.attr['type'] == 'nest': ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') + ri.cw.p(f"free({var}->{ref}{self.c_name});") + else: + raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") def _attr_typol(self): if 'type' not in self.attr or self.attr['type'] == 'nest': @@ -456,7 +494,26 @@ class TypeMultiAttr(Type): raise Exception(f"Sub-type {self.attr['type']} not supported yet") def _attr_get(self, ri, var): - return f'{var}->n_{self.c_name}++;', None, None + return f'n_{self.c_name}++;', None, None + + def attr_put(self, ri, var): + if self.attr['type'] in scalars: + put_type = self._mnl_type() + ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"mnl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") + elif 'type' not in self.attr or self.attr['type'] == 'nest': + ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + + f"{self.enum_name}, &{var}->{self.c_name}[i])") + else: + raise Exception(f"Put of MultiAttr sub-type {self.attr['type']} not supported yet") + + def _setter_lines(self, ri, member, presence): + # For multi-attr we have a count, not presence, hack up the presence + presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name + return [f"free({member});", + f"{member} = {self.c_name};", + f"{presence} = n_{self.c_name};"] class TypeArrayNest(Type): @@ -658,14 +715,13 @@ class Operation(SpecOperation): def __init__(self, family, yaml, req_value, rsp_value): super().__init__(family, yaml, req_value, rsp_value) - if req_value != rsp_value: - raise Exception("Directional messages not supported by codegen") - self.render_name = family.name + '_' + c_lower(self.name) self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \ ('dump' in yaml and 'request' in yaml['dump']) + self.has_ntf = False + # Added by resolve: self.enum_name = None delattr(self, "enum_name") @@ -678,12 +734,8 @@ class Operation(SpecOperation): else: self.enum_name = self.family.async_op_prefix + c_upper(self.name) - def add_notification(self, op): - if 'notify' not in self.yaml: - self.yaml['notify'] = dict() - self.yaml['notify']['reply'] = self.yaml['do']['reply'] - self.yaml['notify']['cmds'] = [] - self.yaml['notify']['cmds'].append(op) + def mark_has_ntf(self): + self.has_ntf = True class Family(SpecFamily): @@ -745,14 +797,12 @@ class Family(SpecFamily): self.root_sets = dict() # dict space-name -> set('request', 'reply') self.pure_nested_structs = dict() - self.all_notify = dict() + self._mark_notify() self._mock_up_events() - self._dictify() self._load_root_sets() self._load_nested_sets() - self._load_all_notify() self._load_hooks() self.kernel_policy = self.yaml.get('kernel-policy', 'split') @@ -768,6 +818,11 @@ class Family(SpecFamily): def new_operation(self, elem, req_value, rsp_value): return Operation(self, elem, req_value, rsp_value) + def _mark_notify(self): + for op in self.msgs.values(): + if 'notify' in op: + self.ops[op['notify']].mark_has_ntf() + # Fake a 'do' equivalent of all events, so that we can render their response parsing def _mock_up_events(self): for op in self.yaml['operations']['list']: @@ -778,16 +833,8 @@ class Family(SpecFamily): } } - def _dictify(self): - ntf = [] - for msg in self.msgs.values(): - if 'notify' in msg: - ntf.append(msg) - for n in ntf: - self.ops[n['notify']].add_notification(n) - def _load_root_sets(self): - for op_name, op in self.ops.items(): + for op_name, op in self.msgs.items(): if 'attribute-set' not in op: continue @@ -798,6 +845,8 @@ class Family(SpecFamily): req_attrs.update(set(op[op_mode]['request']['attributes'])) if op_mode in op and 'reply' in op[op_mode]: rsp_attrs.update(set(op[op_mode]['reply']['attributes'])) + if 'event' in op: + rsp_attrs.update(set(op['event']['attributes'])) if op['attribute-set'] not in self.root_sets: self.root_sets[op['attribute-set']] = {'request': req_attrs, 'reply': rsp_attrs} @@ -806,33 +855,73 @@ class Family(SpecFamily): self.root_sets[op['attribute-set']]['reply'].update(rsp_attrs) def _load_nested_sets(self): + attr_set_queue = list(self.root_sets.keys()) + attr_set_seen = set(self.root_sets.keys()) + + while len(attr_set_queue): + a_set = attr_set_queue.pop(0) + for attr, spec in self.attr_sets[a_set].items(): + if 'nested-attributes' not in spec: + continue + + nested = spec['nested-attributes'] + if nested not in attr_set_seen: + attr_set_queue.append(nested) + attr_set_seen.add(nested) + + inherit = set() + if nested not in self.root_sets: + if nested not in self.pure_nested_structs: + self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit) + else: + raise Exception(f'Using attr set as root and nested not supported - {nested}') + + if 'type-value' in spec: + if nested in self.root_sets: + raise Exception("Inheriting members to a space used as root not supported") + inherit.update(set(spec['type-value'])) + elif spec['type'] == 'array-nest': + inherit.add('idx') + self.pure_nested_structs[nested].set_inherited(inherit) + for root_set, rs_members in self.root_sets.items(): for attr, spec in self.attr_sets[root_set].items(): if 'nested-attributes' in spec: - inherit = set() nested = spec['nested-attributes'] - if nested not in self.root_sets: - self.pure_nested_structs[nested] = Struct(self, nested, inherited=inherit) if attr in rs_members['request']: self.pure_nested_structs[nested].request = True if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True - if 'type-value' in spec: - if nested in self.root_sets: - raise Exception("Inheriting members to a space used as root not supported") - inherit.update(set(spec['type-value'])) - elif spec['type'] == 'array-nest': - inherit.add('idx') - self.pure_nested_structs[nested].set_inherited(inherit) - - def _load_all_notify(self): - for op_name, op in self.ops.items(): - if not op: - continue - - if 'notify' in op: - self.all_notify[op_name] = op['notify']['cmds'] + # Try to reorder according to dependencies + pns_key_list = list(self.pure_nested_structs.keys()) + pns_key_seen = set() + rounds = len(pns_key_list)**2 # it's basically bubble sort + for _ in range(rounds): + if len(pns_key_list) == 0: + break + name = pns_key_list.pop(0) + finished = True + for _, spec in self.attr_sets[name].items(): + if 'nested-attributes' in spec: + if spec['nested-attributes'] not in pns_key_seen: + # Dicts are sorted, this will make struct last + struct = self.pure_nested_structs.pop(name) + self.pure_nested_structs[name] = struct + finished = False + break + if finished: + pns_key_seen.add(name) + else: + pns_key_list.append(name) + # Propagate the request / reply + for attr_set, struct in reversed(self.pure_nested_structs.items()): + for _, spec in self.attr_sets[attr_set].items(): + if 'nested-attributes' in spec: + child = self.pure_nested_structs.get(spec['nested-attributes']) + if child: + child.request |= struct.request + child.reply |= struct.reply def _load_global_policy(self): global_set = set() @@ -874,33 +963,35 @@ class Family(SpecFamily): class RenderInfo: - def __init__(self, cw, family, ku_space, op, op_name, op_mode, attr_set=None): + def __init__(self, cw, family, ku_space, op, op_mode, attr_set=None): self.family = family self.nl = cw.nlib self.ku_space = ku_space - self.op = op - self.op_name = op_name self.op_mode = op_mode + self.op = op # 'do' and 'dump' response parsing is identical - if op_mode != 'do' and 'dump' in op and 'do' in op and 'reply' in op['do'] and \ - op["do"]["reply"] == op["dump"]["reply"]: - self.type_consistent = True - else: - self.type_consistent = op_mode == 'event' + self.type_consistent = True + if op_mode != 'do' and 'dump' in op and 'do' in op: + if ('reply' in op['do']) != ('reply' in op["dump"]): + self.type_consistent = False + elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]: + self.type_consistent = False self.attr_set = attr_set if not self.attr_set: self.attr_set = op['attribute-set'] if op: - self.type_name = c_lower(op_name) + self.type_name = c_lower(op.name) else: self.type_name = c_lower(attr_set) self.cw = cw self.struct = dict() + if op_mode == 'notify': + op_mode = 'do' for op_dir in ['request', 'reply']: if op and op_dir in op[op_mode]: self.struct[op_dir] = Struct(family, self.attr_set, @@ -914,6 +1005,7 @@ class CodeWriter: self.nlib = nlib self._nl = False + self._block_end = False self._silent_block = False self._ind = 0 self._out = out_file @@ -923,9 +1015,17 @@ class CodeWriter: return line.startswith('if') or line.startswith('while') or line.startswith('for') def p(self, line, add_ind=0): + if self._block_end: + self._block_end = False + if line.startswith('else'): + line = '} ' + line + else: + self._out.write('\t' * self._ind + '}\n') + if self._nl: self._out.write('\n') self._nl = False + ind = self._ind if line[-1] == ':': ind -= 1 @@ -949,7 +1049,14 @@ class CodeWriter: if line and line[0] not in {';', ','}: line = ' ' + line self._ind -= 1 - self.p('}' + line) + self._nl = False + if not line: + # Delay printing closing bracket in case "else" comes next + if self._block_end: + self._out.write('\t' * (self._ind + 1) + '}\n') + self._block_end = True + else: + self.p('}' + line) def write_doc_line(self, doc, indent=True): words = doc.split() @@ -1068,7 +1175,40 @@ op_mode_to_wrapper = { } _C_KW = { - 'do' + 'auto', + 'bool', + 'break', + 'case', + 'char', + 'const', + 'continue', + 'default', + 'do', + 'double', + 'else', + 'enum', + 'extern', + 'float', + 'for', + 'goto', + 'if', + 'inline', + 'int', + 'long', + 'register', + 'return', + 'short', + 'signed', + 'sizeof', + 'static', + 'struct', + 'switch', + 'typedef', + 'union', + 'unsigned', + 'void', + 'volatile', + 'while' } @@ -1131,10 +1271,6 @@ def print_dump_prototype(ri): print_prototype(ri, "request") -def put_typol_fwd(cw, struct): - cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;') - - def put_typol(cw, struct): type_max = struct.attr_set.max_name cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') @@ -1152,6 +1288,58 @@ def put_typol(cw, struct): cw.nl() +def _put_enum_to_str_helper(cw, render_name, map_name, arg_name, enum=None): + args = [f'int {arg_name}'] + if enum and not ('enum-name' in enum and not enum['enum-name']): + args = [f'enum {render_name} {arg_name}'] + cw.write_func_prot('const char *', f'{render_name}_str', args) + cw.block_start() + if enum and enum.type == 'flags': + cw.p(f'{arg_name} = ffs({arg_name}) - 1;') + cw.p(f'if ({arg_name} < 0 || {arg_name} >= (int)MNL_ARRAY_SIZE({map_name}))') + cw.p('return NULL;') + cw.p(f'return {map_name}[{arg_name}];') + cw.block_end() + cw.nl() + + +def put_op_name_fwd(family, cw): + cw.write_func_prot('const char *', f'{family.name}_op_str', ['int op'], suffix=';') + + +def put_op_name(family, cw): + map_name = f'{family.name}_op_strmap' + cw.block_start(line=f"static const char * const {map_name}[] =") + for op_name, op in family.msgs.items(): + if op.rsp_value: + if op.req_value == op.rsp_value: + cw.p(f'[{op.enum_name}] = "{op_name}",') + else: + cw.p(f'[{op.rsp_value}] = "{op_name}",') + cw.block_end(line=';') + cw.nl() + + _put_enum_to_str_helper(cw, family.name + '_op', map_name, 'op') + + +def put_enum_to_str_fwd(family, cw, enum): + args = [f'enum {enum.render_name} value'] + if 'enum-name' in enum and not enum['enum-name']: + args = ['int value'] + cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';') + + +def put_enum_to_str(family, cw, enum): + map_name = f'{enum.render_name}_strmap' + cw.block_start(line=f"static const char * const {map_name}[] =") + for entry in enum.entries.values(): + cw.p(f'[{entry.value}] = "{entry.name}",') + cw.block_end(line=';') + cw.nl() + + _put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum) + + def put_req_nested(ri, struct): func_args = ['struct nlmsghdr *nlh', 'unsigned int attr_type', @@ -1196,6 +1384,11 @@ def _multi_parse(ri, struct, init_lines, local_vars): local_vars.append('struct ynl_parse_arg parg;') init_lines.append('parg.ys = yarg->ys;') + all_multi = array_nests | multi_attrs + + for anest in sorted(all_multi): + local_vars.append(f"unsigned int n_{struct[anest].c_name} = 0;") + ri.cw.block_start() ri.cw.write_func_lvar(local_vars) @@ -1206,13 +1399,21 @@ def _multi_parse(ri, struct, init_lines, local_vars): for arg in struct.inherited: ri.cw.p(f'dst->{arg} = {arg};') + for anest in sorted(all_multi): + aspec = struct[anest] + ri.cw.p(f"if (dst->{aspec.c_name})") + ri.cw.p(f'return ynl_error_parse(yarg, "attribute already present ({struct.attr_set.name}.{aspec.name})");') + ri.cw.nl() ri.cw.block_start(line=iter_line) + ri.cw.p('unsigned int type = mnl_attr_get_type(attr);') + ri.cw.nl() first = True for _, arg in struct.member_list(): - arg.attr_get(ri, 'dst', first=first) - first = False + good = arg.attr_get(ri, 'dst', first=first) + # First may be 'unused' or 'pad', ignore those + first &= not good ri.cw.block_end() ri.cw.nl() @@ -1220,8 +1421,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): for anest in sorted(array_nests): aspec = struct[anest] - ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})") - ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.block_start(line=f"if (n_{aspec.c_name})") + ri.cw.p(f"dst->{aspec.c_name} = calloc({aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") ri.cw.block_start(line=f"mnl_attr_for_each_nested(attr, attr_{aspec.c_name})") @@ -1235,8 +1437,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): for anest in sorted(multi_attrs): aspec = struct[anest] - ri.cw.block_start(line=f"if (dst->n_{aspec.c_name})") - ri.cw.p(f"dst->{aspec.c_name} = calloc(dst->n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.block_start(line=f"if (n_{aspec.c_name})") + ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));") + ri.cw.p(f"dst->n_{aspec.c_name} = n_{aspec.c_name};") ri.cw.p('i = 0;') if 'nested-attributes' in aspec: ri.cw.p(f"parg.rsp_policy = &{aspec.nested_render_name}_nest;") @@ -1304,13 +1507,13 @@ def print_req(ri): ret_err = '-1' direction = "request" local_vars = ['struct nlmsghdr *nlh;', - 'int len, err;'] + 'int err;'] if 'reply' in ri.op[ri.op_mode]: ret_ok = 'rsp' ret_err = 'NULL' local_vars += [f'{type_name(ri, rdir(direction))} *rsp;', - 'struct ynl_parse_arg yarg = { .ys = ys, };'] + 'struct ynl_req_state yrs = { .yarg = { .ys = ys, }, };'] print_prototype(ri, direction, terminate=False) ri.cw.block_start() @@ -1320,41 +1523,39 @@ def print_req(ri): ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;") if 'reply' in ri.op[ri.op_mode]: - ri.cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") + ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") ri.cw.nl() for _, attr in ri.struct["request"].member_list(): attr.attr_put(ri, "req") ri.cw.nl() - ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);') - ri.cw.p('if (err < 0)') - ri.cw.p(f"return {ret_err};") - ri.cw.nl() - ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') - ri.cw.p('if (len < 0)') - ri.cw.p(f"return {ret_err};") - ri.cw.nl() - + parse_arg = "NULL" if 'reply' in ri.op[ri.op_mode]: ri.cw.p('rsp = calloc(1, sizeof(*rsp));') - ri.cw.p('yarg.data = rsp;') + ri.cw.p('yrs.yarg.data = rsp;') + ri.cw.p(f"yrs.cb = {op_prefix(ri, 'reply')}_parse;") + if ri.op.value is not None: + ri.cw.p(f'yrs.rsp_cmd = {ri.op.enum_name};') + else: + ri.cw.p(f'yrs.rsp_cmd = {ri.op.rsp_value};') ri.cw.nl() - ri.cw.p(f"err = {ri.nl.parse_cb_run(op_prefix(ri, 'reply') + '_parse', '&yarg', False)};") - ri.cw.p('if (err < 0)') + parse_arg = '&yrs' + ri.cw.p(f"err = ynl_exec(ys, nlh, {parse_arg});") + ri.cw.p('if (err < 0)') + if 'reply' in ri.op[ri.op_mode]: ri.cw.p('goto err_free;') - ri.cw.nl() - - ri.cw.p('err = ynl_recv_ack(ys, err);') - ri.cw.p('if (err)') - ri.cw.p('goto err_free;') + else: + ri.cw.p('return -1;') ri.cw.nl() + ri.cw.p(f"return {ret_ok};") ri.cw.nl() - ri.cw.p('err_free:') if 'reply' in ri.op[ri.op_mode]: + ri.cw.p('err_free:') ri.cw.p(f"{call_free(ri, rdir(direction), 'rsp')}") - ri.cw.p(f"return {ret_err};") + ri.cw.p(f"return {ret_err};") + ri.cw.block_end() @@ -1364,7 +1565,7 @@ def print_dump(ri): ri.cw.block_start() local_vars = ['struct ynl_dump_state yds = {};', 'struct nlmsghdr *nlh;', - 'int len, err;'] + 'int err;'] for var in local_vars: ri.cw.p(f'{var}') @@ -1373,6 +1574,10 @@ def print_dump(ri): ri.cw.p('yds.ys = ys;') ri.cw.p(f"yds.alloc_sz = sizeof({type_name(ri, rdir(direction))});") ri.cw.p(f"yds.cb = {op_prefix(ri, 'reply', deref=True)}_parse;") + if ri.op.value is not None: + ri.cw.p(f'yds.rsp_cmd = {ri.op.enum_name};') + else: + ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};') ri.cw.p(f"yds.rsp_policy = &{ri.struct['reply'].render_name}_nest;") ri.cw.nl() ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);") @@ -1384,20 +1589,9 @@ def print_dump(ri): attr.attr_put(ri, "req") ri.cw.nl() - ri.cw.p('err = mnl_socket_sendto(ys->sock, nlh, nlh->nlmsg_len);') - ri.cw.p('if (err < 0)') - ri.cw.p('return NULL;') - ri.cw.nl() - - ri.cw.block_start(line='do') - ri.cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') - ri.cw.p('if (len < 0)') - ri.cw.p('goto free_list;') - ri.cw.nl() - ri.cw.p(f"err = {ri.nl.parse_cb_run('ynl_dump_trampoline', '&yds', False, indent=2)};") + ri.cw.p('err = ynl_exec_dump(ys, nlh, &yds);') ri.cw.p('if (err < 0)') ri.cw.p('goto free_list;') - ri.cw.block_end(line='while (err > 0);') ri.cw.nl() ri.cw.p('return yds.first;') @@ -1418,6 +1612,14 @@ def free_arg_name(direction): return 'obj' +def print_alloc_wrapper(ri, direction): + name = op_prefix(ri, direction) + ri.cw.write_func_prot(f'static inline struct {name} *', f"{name}_alloc", [f"void"]) + ri.cw.block_start() + ri.cw.p(f'return calloc(1, sizeof(struct {name}));') + ri.cw.block_end() + + def print_free_prototype(ri, direction, suffix=';'): name = op_prefix(ri, direction) arg = free_arg_name(direction) @@ -1465,6 +1667,7 @@ def print_type_full(ri, struct): def print_type_helpers(ri, direction, deref=False): print_free_prototype(ri, direction) + ri.cw.nl() if ri.ku_space == 'user' and direction == 'request': for _, attr in ri.struct[direction].member_list(): @@ -1473,6 +1676,7 @@ def print_type_helpers(ri, direction, deref=False): def print_req_type_helpers(ri): + print_alloc_wrapper(ri, "request") print_type_helpers(ri, "request") @@ -1496,6 +1700,12 @@ def print_req_type(ri): print_type(ri, "request") +def print_req_free(ri): + if 'request' not in ri.op[ri.op_mode]: + return + _free_type(ri, 'request', ri.struct['request']) + + def print_rsp_type(ri): if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]: direction = 'reply' @@ -1513,6 +1723,7 @@ def print_wrapped_type(ri): elif ri.op_mode == 'notify' or ri.op_mode == 'event': ri.cw.p('__u16 family;') ri.cw.p('__u8 cmd;') + ri.cw.p('struct ynl_ntf_base_type *next;') ri.cw.p(f"void (*free)({type_name(ri, 'reply')} *ntf);") ri.cw.p(f"{type_name(ri, 'reply', deref=True)} obj __attribute__ ((aligned (8)));") ri.cw.block_end(line=';') @@ -1564,7 +1775,7 @@ def print_dump_type_free(ri): ri.cw.block_start() ri.cw.p(f"{sub_type} *next = rsp;") ri.cw.nl() - ri.cw.block_start(line='while (next)') + ri.cw.block_start(line='while ((void *)next != YNL_LIST_END)') _free_type_members_iter(ri, ri.struct['reply']) ri.cw.p('rsp = next;') ri.cw.p('next = rsp->next;') @@ -1587,70 +1798,6 @@ def print_ntf_type_free(ri): ri.cw.nl() -def print_ntf_parse_prototype(family, cw, suffix=';'): - cw.write_func_prot('struct ynl_ntf_base_type *', f"{family['name']}_ntf_parse", - ['struct ynl_sock *ys'], suffix=suffix) - - -def print_ntf_type_parse(family, cw, ku_mode): - print_ntf_parse_prototype(family, cw, suffix='') - cw.block_start() - cw.write_func_lvar(['struct genlmsghdr *genlh;', - 'struct nlmsghdr *nlh;', - 'struct ynl_parse_arg yarg = { .ys = ys, };', - 'struct ynl_ntf_base_type *rsp;', - 'int len, err;', - 'mnl_cb_t parse;']) - cw.p('len = mnl_socket_recvfrom(ys->sock, ys->rx_buf, MNL_SOCKET_BUFFER_SIZE);') - cw.p('if (len < (ssize_t)(sizeof(*nlh) + sizeof(*genlh)))') - cw.p('return NULL;') - cw.nl() - cw.p('nlh = (struct nlmsghdr *)ys->rx_buf;') - cw.p('genlh = mnl_nlmsg_get_payload(nlh);') - cw.nl() - cw.block_start(line='switch (genlh->cmd)') - for ntf_op in sorted(family.all_notify.keys()): - op = family.ops[ntf_op] - ri = RenderInfo(cw, family, ku_mode, op, ntf_op, "notify") - for ntf in op['notify']['cmds']: - cw.p(f"case {ntf.enum_name}:") - cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'notify')}));") - cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;") - cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") - cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;") - cw.p('break;') - for op_name, op in family.ops.items(): - if 'event' not in op: - continue - ri = RenderInfo(cw, family, ku_mode, op, op_name, "event") - cw.p(f"case {op.enum_name}:") - cw.p(f"rsp = calloc(1, sizeof({type_name(ri, 'event')}));") - cw.p(f"parse = {op_prefix(ri, 'reply', deref=True)}_parse;") - cw.p(f"yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;") - cw.p(f"rsp->free = (void *){op_prefix(ri, 'notify')}_free;") - cw.p('break;') - cw.p('default:') - cw.p('ynl_error_unknown_notification(ys, genlh->cmd);') - cw.p('return NULL;') - cw.block_end() - cw.nl() - cw.p('yarg.data = rsp->data;') - cw.nl() - cw.p(f"err = {cw.nlib.parse_cb_run('parse', '&yarg', True)};") - cw.p('if (err < 0)') - cw.p('goto err_free;') - cw.nl() - cw.p('rsp->family = nlh->nlmsg_type;') - cw.p('rsp->cmd = genlh->cmd;') - cw.p('return rsp;') - cw.nl() - cw.p('err_free:') - cw.p('free(rsp);') - cw.p('return NULL;') - cw.block_end() - cw.nl() - - def print_req_policy_fwd(cw, struct, ri=None, terminate=True): if terminate and ri and kernel_can_gen_family_struct(struct.family): return @@ -2035,6 +2182,48 @@ def render_uapi(family, cw): cw.p(f'#endif /* {hdr_prot} */') +def _render_user_ntf_entry(ri, op): + ri.cw.block_start(line=f"[{op.enum_name}] = ") + ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),") + ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,") + ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,") + ri.cw.p(f".free\t\t= (void *){op_prefix(ri, 'notify')}_free,") + ri.cw.block_end(line=',') + + +def render_user_family(family, cw, prototype): + symbol = f'const struct ynl_family ynl_{family.c_name}_family' + if prototype: + cw.p(f'extern {symbol};') + return + + if family.ntfs: + cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ") + for ntf_op_name, ntf_op in family.ntfs.items(): + if 'notify' in ntf_op: + op = family.ops[ntf_op['notify']] + ri = RenderInfo(cw, family, "user", op, "notify") + elif 'event' in ntf_op: + ri = RenderInfo(cw, family, "user", ntf_op, "event") + else: + raise Exception('Invalid notification ' + ntf_op_name) + _render_user_ntf_entry(ri, ntf_op) + for op_name, op in family.ops.items(): + if 'event' not in op: + continue + ri = RenderInfo(cw, family, "user", op, "event") + _render_user_ntf_entry(ri, op) + cw.block_end(line=";") + cw.nl() + + cw.block_start(f'{symbol} = ') + cw.p(f'.name\t\t= "{family.name}",') + if family.ntfs: + cw.p(f".ntf_info\t= {family['name']}_ntf_info,") + cw.p(f".ntf_info_size\t= MNL_ARRAY_SIZE({family['name']}_ntf_info),") + cw.block_end(line=';') + + def find_kernel_root(full_path): sub_path = '' while True: @@ -2071,6 +2260,13 @@ def main(): os.sys.exit(1) return + supported_models = ['unified'] + if args.mode == 'user': + supported_models += ['directional'] + if parsed.msg_id_model not in supported_models: + print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') + os.sys.exit(1) + cw = CodeWriter(BaseNlLib(), out_file) _, spec_kernel = find_kernel_root(args.spec) @@ -2101,7 +2297,16 @@ def main(): if args.out_file: cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"') cw.nl() - headers = [parsed.uapi_header] + headers = ['uapi/' + parsed.uapi_header] + else: + cw.p('#include <stdlib.h>') + cw.p('#include <string.h>') + if args.header: + cw.p('#include <linux/types.h>') + else: + cw.p(f'#include "{parsed.name}-user.h"') + cw.p('#include "ynl.h"') + headers = [parsed.uapi_header] for definition in parsed['definitions']: if 'header' in definition: headers.append(definition['header']) @@ -2111,9 +2316,6 @@ def main(): if args.mode == "user": if not args.header: - cw.p("#include <stdlib.h>") - cw.p("#include <stdio.h>") - cw.p("#include <string.h>") cw.p("#include <libmnl/libmnl.h>") cw.p("#include <linux/genetlink.h>") cw.nl() @@ -2121,6 +2323,8 @@ def main(): cw.p(f'#include "{one}"') else: cw.p('struct ynl_sock;') + cw.nl() + render_user_family(parsed, cw, True) cw.nl() if args.mode == "kernel": @@ -2144,7 +2348,7 @@ def main(): if parsed.kernel_policy in {'per-op', 'split'}: for op_name, op in parsed.ops.items(): if 'do' in op and 'event' not in op: - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + ri = RenderInfo(cw, parsed, args.mode, op, "do") print_req_policy_fwd(cw, ri.struct['request'], ri=ri) cw.nl() @@ -2173,7 +2377,7 @@ def main(): for op_mode in ['do', 'dump']: if op_mode in op and 'request' in op[op_mode]: cw.p(f"/* {op.enum_name} - {op_mode} */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, op_mode) + ri = RenderInfo(cw, parsed, args.mode, op, op_mode) print_req_policy(cw, ri.struct['request'], ri=ri) cw.nl() @@ -2182,11 +2386,18 @@ def main(): print_kernel_family_struct_src(parsed, cw) if args.mode == "user": - has_ntf = False if args.header: + cw.p('/* Enums */') + put_op_name_fwd(parsed, cw) + + for name, const in parsed.consts.items(): + if isinstance(const, EnumSet): + put_enum_to_str_fwd(parsed, cw, const) + cw.nl() + cw.p('/* Common nested types */') - for attr_set, struct in sorted(parsed.pure_nested_structs.items()): - ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set) + for attr_set, struct in parsed.pure_nested_structs.items(): + ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) for op_name, op in parsed.ops.items(): @@ -2194,7 +2405,7 @@ def main(): if 'do' in op and 'event' not in op: cw.p(f"/* {op.enum_name} - do */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + ri = RenderInfo(cw, parsed, args.mode, op, "do") print_req_type(ri) print_req_type_helpers(ri) cw.nl() @@ -2206,7 +2417,7 @@ def main(): if 'dump' in op: cw.p(f"/* {op.enum_name} - dump */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'dump') + ri = RenderInfo(cw, parsed, args.mode, op, 'dump') if 'request' in op['dump']: print_req_type(ri) print_req_type_helpers(ri) @@ -2216,39 +2427,41 @@ def main(): print_dump_prototype(ri) cw.nl() - if 'notify' in op: + if op.has_ntf: cw.p(f"/* {op.enum_name} - notify */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify') - has_ntf = True + ri = RenderInfo(cw, parsed, args.mode, op, 'notify') if not ri.type_consistent: - raise Exception('Only notifications with consistent types supported') + raise Exception(f'Only notifications with consistent types supported ({op.name})') print_wrapped_type(ri) + for op_name, op in parsed.ntfs.items(): if 'event' in op: - ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'event') + ri = RenderInfo(cw, parsed, args.mode, op, 'event') cw.p(f"/* {op.enum_name} - event */") print_rsp_type(ri) cw.nl() print_wrapped_type(ri) - - if has_ntf: - cw.p('/* --------------- Common notification parsing --------------- */') - print_ntf_parse_prototype(parsed, cw) cw.nl() else: - cw.p('/* Policies */') - for name, _ in parsed.attr_sets.items(): - struct = Struct(parsed, name) - put_typol_fwd(cw, struct) + cw.p('/* Enums */') + put_op_name(parsed, cw) + + for name, const in parsed.consts.items(): + if isinstance(const, EnumSet): + put_enum_to_str(parsed, cw, const) cw.nl() - for name, _ in parsed.attr_sets.items(): + cw.p('/* Policies */') + for name in parsed.pure_nested_structs: + struct = Struct(parsed, name) + put_typol(cw, struct) + for name in parsed.root_sets: struct = Struct(parsed, name) put_typol(cw, struct) cw.p('/* Common nested types */') - for attr_set, struct in sorted(parsed.pure_nested_structs.items()): - ri = RenderInfo(cw, parsed, args.mode, "", "", "", attr_set) + for attr_set, struct in parsed.pure_nested_structs.items(): + ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) free_rsp_nested(ri, struct) if struct.request: @@ -2260,7 +2473,8 @@ def main(): cw.p(f"/* ============== {op.enum_name} ============== */") if 'do' in op and 'event' not in op: cw.p(f"/* {op.enum_name} - do */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + ri = RenderInfo(cw, parsed, args.mode, op, "do") + print_req_free(ri) print_rsp_free(ri) parse_rsp_msg(ri) print_req(ri) @@ -2268,34 +2482,31 @@ def main(): if 'dump' in op: cw.p(f"/* {op.enum_name} - dump */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "dump") + ri = RenderInfo(cw, parsed, args.mode, op, "dump") if not ri.type_consistent: parse_rsp_msg(ri, deref=True) print_dump_type_free(ri) print_dump(ri) cw.nl() - if 'notify' in op: + if op.has_ntf: cw.p(f"/* {op.enum_name} - notify */") - ri = RenderInfo(cw, parsed, args.mode, op, op_name, 'notify') - has_ntf = True + ri = RenderInfo(cw, parsed, args.mode, op, 'notify') if not ri.type_consistent: - raise Exception('Only notifications with consistent types supported') + raise Exception(f'Only notifications with consistent types supported ({op.name})') print_ntf_type_free(ri) + for op_name, op in parsed.ntfs.items(): if 'event' in op: cw.p(f"/* {op.enum_name} - event */") - has_ntf = True - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "do") + ri = RenderInfo(cw, parsed, args.mode, op, "do") parse_rsp_msg(ri) - ri = RenderInfo(cw, parsed, args.mode, op, op_name, "event") + ri = RenderInfo(cw, parsed, args.mode, op, "event") print_ntf_type_free(ri) - - if has_ntf: - cw.p('/* --------------- Common notification parsing --------------- */') - print_ntf_type_parse(parsed, cw, args.mode) + cw.nl() + render_user_family(parsed, cw, False) if args.header: cw.p(f'#endif /* {hdr_prot} */') diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh index 74f5de1c2399..2a4525e2aa17 100755 --- a/tools/net/ynl/ynl-regen.sh +++ b/tools/net/ynl/ynl-regen.sh @@ -14,7 +14,7 @@ done KDIR=$(dirname $(dirname $(dirname $(dirname $(realpath $0))))) -files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\)') +files=$(git grep --files-with-matches '^/\* YNL-GEN \(kernel\|uapi\|user\)') for f in $files; do # params: 0 1 2 3 # $YAML YNL-GEN kernel $mode diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4884520f954f..a794d9eca93d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -216,6 +216,12 @@ ifeq ($(call get-executable,$(BISON)),) dummy := $(error Error: $(BISON) is missing on this system, please install it) endif +ifeq ($(BUILD_BPF_SKEL),1) + ifeq ($(call get-executable,$(CLANG)),) + dummy := $(error $(CLANG) is missing on this system, please install it to be able to build with BUILD_BPF_SKEL=1) + endif +endif + ifneq ($(OUTPUT),) ifeq ($(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 371), 1) BISON_FILE_PREFIX_MAP := --file-prefix-map=$(OUTPUT)= @@ -921,6 +927,7 @@ ifndef NO_DEMANGLE EXTLIBS += -lstdc++ CFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT CXXFLAGS += -DHAVE_CXA_DEMANGLE_SUPPORT + $(call detected,CONFIG_CXX_DEMANGLE) endif ifdef BUILD_NONDISTRO ifeq ($(filter -liberty,$(EXTLIBS)),) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a42a6a99c2bc..f48794816d82 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -181,7 +181,6 @@ HOSTCC ?= gcc HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -LLVM_STRIP ?= llvm-strip PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -1057,15 +1056,33 @@ $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_ ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool -BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) +TOOLS_UAPI_INCLUDE := -I$(srctree)/tools/include/uapi $(BPFTOOL): | $(SKEL_TMP_OUT) $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ - -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) $(TOOLS_UAPI_INCLUDE) \ + -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) $(QUIET_GENSKEL)$(BPFTOOL) gen skeleton $< > $@ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 77cb03e6ff87..9ca040bfb1aa 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -78,9 +78,9 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err; u32 val; - u64 contextid = - evsel->core.attr.config & - (perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") | + u64 contextid = evsel->core.attr.config & + (perf_pmu__format_bits(&cs_etm_pmu->format, "contextid") | + perf_pmu__format_bits(&cs_etm_pmu->format, "contextid1") | perf_pmu__format_bits(&cs_etm_pmu->format, "contextid2")); if (!contextid) @@ -114,8 +114,7 @@ static int cs_etm_validate_context_id(struct auxtrace_record *itr, * 0b00100 Maximum of 32-bit Context ID size. * All other values are reserved. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { + if (BMVAL(val, 5, 9) != 0x4) { pr_err("%s: CONTEXTIDR_EL1 isn't supported, disable with %s/contextid1=0/\n", CORESIGHT_ETM_PMU_NAME, CORESIGHT_ETM_PMU_NAME); return -EINVAL; diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 860a8b42b4b5..a9623b128ece 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -12,7 +12,7 @@ #include "arm-spe.h" #include "hisi-ptt.h" #include "../../../util/pmu.h" -#include "../cs-etm.h" +#include "../../../util/cs-etm.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index d730666ab95d..80b9f6287fe2 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -29,8 +29,8 @@ static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) char path[PATH_MAX]; FILE *file; - scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, - sysfs, cpus->map[cpu]); + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, + sysfs, RC_CHK_ACCESS(cpus)->map[cpu].cpu); file = fopen(path, "r"); if (!file) { diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index fa143acb4c8d..ef1ed645097c 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -18,7 +18,7 @@ static struct perf_pmu *pmu__find_core_pmu(void) * The cpumap should cover all CPUs. Otherwise, some CPUs may * not support some events or have different event IDs. */ - if (pmu->cpus->nr != cpu__max_cpu().cpu) + if (RC_CHK_ACCESS(pmu->cpus)->nr != cpu__max_cpu().cpu) return NULL; return pmu; diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 799147658dee..b68f47541169 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -449,7 +449,7 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self -# 447 reserved for memfd_secret +447 common memfd_secret sys_memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index 50ae8bd58296..6188e19d3129 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -7,7 +7,3 @@ MEMCPY_FN(memcpy_orig, MEMCPY_FN(__memcpy, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") - -MEMCPY_FN(memcpy_erms, - "x86-64-movsb", - "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 6eb45a2aa8db..1b9fef7efcdc 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -2,7 +2,7 @@ /* Various wrappers to make the kernel .S file build in user-space: */ -// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it +// memcpy_orig is being defined as SYM_L_LOCAL but we need it #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memcpy MEMCPY /* don't hide glibc's memcpy() */ diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index dac6d2b7c39b..247c72fdfb9d 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -7,7 +7,3 @@ MEMSET_FN(memset_orig, MEMSET_FN(__memset, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") - -MEMSET_FN(memset_erms, - "x86-64-stosb", - "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 6f093c483842..abd26c95f1aa 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it +// memset_orig is being defined as SYM_L_LOCAL but we need it #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memset MEMSET /* don't hide glibc's memset() */ diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 810e3376c7d6..f9906f52e4fa 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -1175,7 +1175,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, "Use BPF to measure function latency"), #endif - OPT_BOOLEAN('n', "--use-nsec", &ftrace.use_nsec, + OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec, "Use nano-second histogram"), OPT_PARENT(common_options), }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 006f522d0e7f..c57be48d65bb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3647,6 +3647,13 @@ static int process_stat_config_event(struct perf_session *session __maybe_unused union perf_event *event) { perf_event__read_stat_config(&stat_config, &event->stat_config); + + /* + * Aggregation modes are not used since post-processing scripts are + * supposed to take care of such requirements + */ + stat_config.aggr_mode = AGGR_NONE; + return 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cc9fa48d636f..b9ad32f21e57 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -667,6 +667,13 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) evsel_list->core.threads->err_thread = -1; return COUNTER_RETRY; } + } else if (counter->skippable) { + if (verbose > 0) + ui__warning("skipping event %s that kernel failed to open .\n", + evsel__name(counter)); + counter->supported = false; + counter->errored = true; + return COUNTER_SKIP; } evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); @@ -1890,15 +1897,28 @@ static int add_default_attributes(void) * caused by exposing latent bugs. This is fixed properly in: * https://lore.kernel.org/lkml/bff481ba-e60a-763f-0aa0-3ee53302c480@linux.intel.com/ */ - if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid() && - metricgroup__parse_groups(evsel_list, "TopdownL1", - /*metric_no_group=*/false, - /*metric_no_merge=*/false, - /*metric_no_threshold=*/true, - stat_config.user_requested_cpu_list, - stat_config.system_wide, - &stat_config.metric_events) < 0) - return -1; + if (metricgroup__has_metric("TopdownL1") && !perf_pmu__has_hybrid()) { + struct evlist *metric_evlist = evlist__new(); + struct evsel *metric_evsel; + + if (!metric_evlist) + return -1; + + if (metricgroup__parse_groups(metric_evlist, "TopdownL1", + /*metric_no_group=*/false, + /*metric_no_merge=*/false, + /*metric_no_threshold=*/true, + stat_config.user_requested_cpu_list, + stat_config.system_wide, + &stat_config.metric_events) < 0) + return -1; + + evlist__for_each_entry(metric_evlist, metric_evsel) { + metric_evsel->skippable = true; + } + evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries); + evlist__delete(metric_evlist); + } /* Platform specific attrs */ if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0) diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json index 75d80e70e5cd..1f9047553942 100644 --- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json @@ -133,6 +133,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", "ScaleUnit": "100%", "Unit": "cpu_atom" @@ -143,6 +144,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound_aux", "MetricThreshold": "tma_backend_bound_aux > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.", "ScaleUnit": "100%", "Unit": "cpu_atom" @@ -153,6 +155,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", "ScaleUnit": "100%", "Unit": "cpu_atom" @@ -163,6 +166,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_base", "MetricThreshold": "tma_base > 0.6", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -182,6 +186,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -209,6 +214,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -255,6 +261,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -264,6 +271,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.15", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -291,6 +299,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -593,6 +602,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.05", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -611,6 +621,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -629,6 +640,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_ms_uops", "MetricThreshold": "tma_ms_uops > 0.05", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "ScaleUnit": "100%", "Unit": "cpu_atom" @@ -729,6 +741,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group", "MetricName": "tma_resource_bound", "MetricThreshold": "tma_resource_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.", "ScaleUnit": "100%", "Unit": "cpu_atom" @@ -739,6 +752,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.75", + "MetricgroupNoGroup": "TopdownL1", "ScaleUnit": "100%", "Unit": "cpu_atom" }, @@ -848,6 +862,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -858,6 +873,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -868,6 +884,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -919,6 +936,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -1031,6 +1049,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -1041,6 +1060,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -1121,6 +1141,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -1141,6 +1162,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -2023,6 +2045,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -2082,6 +2105,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -2112,6 +2136,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%", "Unit": "cpu_core" @@ -2310,6 +2335,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%", "Unit": "cpu_core" diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json index 1a85d935c733..0402adbf7d92 100644 --- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json +++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -98,6 +98,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", "ScaleUnit": "100%" }, @@ -107,6 +108,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound_aux", "MetricThreshold": "tma_backend_bound_aux > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.", "ScaleUnit": "100%" }, @@ -116,6 +118,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", "ScaleUnit": "100%" }, @@ -125,6 +128,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_base", "MetricThreshold": "tma_base > 0.6", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -142,6 +146,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.05", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -166,6 +171,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -207,6 +213,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -215,6 +222,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.15", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -239,6 +247,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "ScaleUnit": "100%" }, { @@ -499,6 +508,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.05", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -515,6 +525,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "ScaleUnit": "100%" }, { @@ -531,6 +542,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_ms_uops", "MetricThreshold": "tma_ms_uops > 0.05", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", "ScaleUnit": "100%" }, @@ -620,6 +632,7 @@ "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_aux_group", "MetricName": "tma_resource_bound", "MetricThreshold": "tma_resource_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count.", "ScaleUnit": "100%" }, @@ -629,6 +642,7 @@ "MetricGroup": "TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.75", + "MetricgroupNoGroup": "TopdownL1", "ScaleUnit": "100%" }, { diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json index 51cf8560a8d3..f9e2316601e1 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -170,6 +173,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -263,6 +267,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -272,6 +277,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -326,6 +332,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -335,6 +342,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -828,6 +836,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -858,6 +867,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -886,6 +896,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1048,6 +1059,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json index fb57c7382408..e9c46d336a8e 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json @@ -97,6 +97,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%" }, @@ -106,6 +107,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -116,6 +118,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -164,6 +167,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -248,6 +252,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -257,6 +262,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -311,6 +317,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -320,6 +327,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -795,6 +803,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -825,6 +834,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -853,6 +863,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1013,6 +1024,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json index 65ec0c9e55d1..437b9867acb9 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -170,6 +173,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -263,6 +267,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -272,6 +277,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -326,6 +332,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -335,6 +342,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -829,6 +837,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -869,6 +878,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -897,6 +907,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1079,6 +1090,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 8f7dc72accd0..875c766222e3 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -101,6 +101,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -110,6 +111,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -120,6 +122,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -167,6 +170,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -271,6 +275,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -280,6 +285,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -354,6 +360,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -372,6 +379,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1142,6 +1150,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1196,6 +1205,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1224,6 +1234,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1458,6 +1469,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json index 2528418200bb..9570a88d6d1c 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -161,6 +164,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -254,6 +258,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -263,6 +268,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -272,6 +278,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -281,6 +288,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -663,6 +671,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -693,6 +702,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -721,6 +731,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -874,6 +885,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json index 11f152c346eb..a522202cf684 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -161,6 +164,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -254,6 +258,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -263,6 +268,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -272,6 +278,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -281,6 +288,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -664,6 +672,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -704,6 +713,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -732,6 +742,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -905,6 +916,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json index f45ae3483df4..1a2154f28b7b 100644 --- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json @@ -115,6 +115,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%" }, @@ -124,6 +125,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -141,6 +143,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -187,6 +190,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -288,6 +292,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -297,6 +302,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -369,6 +375,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -378,6 +385,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1111,6 +1119,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1164,6 +1173,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1191,6 +1201,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1360,6 +1371,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 0f9b174dfc22..1ef772b40e04 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -80,6 +80,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%" }, @@ -89,6 +90,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -106,6 +108,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -152,6 +155,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -253,6 +257,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -262,6 +267,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -334,6 +340,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -343,6 +350,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1134,6 +1142,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1187,6 +1196,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1214,6 +1224,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1410,6 +1421,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json index 5247f69c13b6..11080ccffd51 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -161,6 +164,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -254,6 +258,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -263,6 +268,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -299,6 +305,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -308,6 +315,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -724,6 +732,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -754,6 +763,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -782,6 +792,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -917,6 +928,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json index 89469b10fa30..65a46d659c0a 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -103,6 +103,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -112,6 +113,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -122,6 +124,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -161,6 +164,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -254,6 +258,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -263,6 +268,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -299,6 +305,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -308,6 +315,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -725,6 +733,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -765,6 +774,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -793,6 +803,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -948,6 +959,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json index e8f4e5c01c9f..66a6f657bd6f 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -76,6 +76,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -85,6 +86,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -95,6 +97,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -114,6 +117,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -160,6 +164,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp", "ScaleUnit": "100%" }, @@ -169,6 +174,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -205,6 +211,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -214,6 +221,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -412,6 +420,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -422,6 +431,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -450,6 +460,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -487,6 +498,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json index 4a99fe515f4b..4b8bc19392a4 100644 --- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -76,6 +76,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -85,6 +86,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -95,6 +97,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -114,6 +117,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -160,6 +164,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_lcp", "ScaleUnit": "100%" }, @@ -169,6 +174,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: RS_EVENTS.EMPTY_END", "ScaleUnit": "100%" }, @@ -205,6 +211,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound.", "ScaleUnit": "100%" }, @@ -214,6 +221,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -411,6 +419,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -421,6 +430,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -449,6 +459,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -486,6 +497,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json index 126300b7ae77..620fc5bd2217 100644 --- a/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json +++ b/tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json @@ -87,6 +87,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%" }, @@ -96,6 +97,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -105,6 +107,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: TOPDOWN.BR_MISPREDICT_SLOTS. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -151,6 +154,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -252,6 +256,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 6 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -261,6 +266,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -351,6 +357,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -369,6 +376,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences. Sample with: UOPS_RETIRED.HEAVY", "ScaleUnit": "100%" }, @@ -1216,6 +1224,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1269,6 +1278,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1304,6 +1314,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1509,6 +1520,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index a6d212b349f5..21ef6c9be816 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -101,6 +101,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -110,6 +111,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -120,6 +122,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -167,6 +170,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -271,6 +275,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -280,6 +285,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -345,6 +351,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -363,6 +370,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1065,6 +1073,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1110,6 +1119,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1138,6 +1148,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1343,6 +1354,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index fa2f7f126a30..eb6f12c0343d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -101,6 +101,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound.", "ScaleUnit": "100%" }, @@ -110,6 +111,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -120,6 +122,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -167,6 +170,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -271,6 +275,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 4 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -280,6 +285,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -354,6 +360,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -372,6 +379,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1123,6 +1131,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1177,6 +1186,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1205,6 +1215,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1429,6 +1440,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.RETIRE_SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json index 4c80d6be6cf1..b442ed4acfbb 100644 --- a/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json @@ -109,6 +109,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_backend_bound", "MetricThreshold": "tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. Sample with: TOPDOWN.BACKEND_BOUND_SLOTS", "ScaleUnit": "100%" }, @@ -118,6 +119,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_bad_speculation", "MetricThreshold": "tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example.", "ScaleUnit": "100%" }, @@ -135,6 +137,7 @@ "MetricGroup": "BadSpec;BrMispredicts;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM", "MetricName": "tma_branch_mispredicts", "MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction. These slots are either wasted by uops fetched from an incorrectly speculated program path; or stalls when the out-of-order part of the machine needs to recover its state from a speculative path. Sample with: BR_MISP_RETIRED.ALL_BRANCHES. Related metrics: tma_info_branch_misprediction_cost, tma_info_mispredictions, tma_mispredicts_resteers", "ScaleUnit": "100%" }, @@ -181,6 +184,7 @@ "MetricGroup": "Backend;Compute;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_core_bound", "MetricThreshold": "tma_core_bound > 0.1 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where Core non-memory issues were of a bottleneck. Shortage in hardware compute resources; or dependencies in software's instructions are both categorized under Core Bound. Hence it may indicate the machine ran out of an out-of-order resource; certain execution units are overloaded or dependencies in program's data- or instruction-flow are limiting the performance (e.g. FP-chained long-latency arithmetic operations).", "ScaleUnit": "100%" }, @@ -282,6 +286,7 @@ "MetricGroup": "FetchBW;Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group;tma_issueFB", "MetricName": "tma_fetch_bandwidth", "MetricThreshold": "tma_fetch_bandwidth > 0.1 & tma_frontend_bound > 0.15 & tma_info_ipc / 5 > 0.35", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend bandwidth issues. For example; inefficiencies at the instruction decoders; or restrictions for caching in the DSB (decoded uops cache) are categorized under Fetch Bandwidth. In such cases; the Frontend typically delivers suboptimal amount of uops to the Backend. Sample with: FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_1_PS;FRONTEND_RETIRED.LATENCY_GE_2_PS. Related metrics: tma_dsb_switches, tma_info_dsb_coverage, tma_info_dsb_misses, tma_info_iptb, tma_lcp", "ScaleUnit": "100%" }, @@ -291,6 +296,7 @@ "MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group", "MetricName": "tma_fetch_latency", "MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues. For example; instruction-cache misses; iTLB misses or fetch stalls after a branch misprediction are categorized under Frontend Latency. In such cases; the Frontend eventually delivers no uops for some period. Sample with: FRONTEND_RETIRED.LATENCY_GE_16_PS;FRONTEND_RETIRED.LATENCY_GE_8_PS", "ScaleUnit": "100%" }, @@ -363,6 +369,7 @@ "MetricGroup": "PGO;TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_frontend_bound", "MetricThreshold": "tma_frontend_bound > 0.15", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Pipeline_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. Sample with: FRONTEND_RETIRED.LATENCY_GE_4_PS", "ScaleUnit": "100%" }, @@ -372,6 +379,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_heavy_operations", "MetricThreshold": "tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences. This highly-correlates with the uop length of these instructions/sequences.", "ScaleUnit": "100%" }, @@ -1125,6 +1133,7 @@ "MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group", "MetricName": "tma_light_operations", "MetricThreshold": "tma_light_operations > 0.6", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots where the CPU was retiring light-weight operations -- instructions that require no more than one uop (micro-operation). This correlates with total number of instructions used by the program. A uops-per-instruction (see UopPI metric) ratio of 1 or less should be expected for decently optimized software running on Intel Core/Xeon products. While this often indicates efficient X86 instructions were executed; high value does not necessarily mean better performance cannot be achieved. Sample with: INST_RETIRED.PREC_DIST", "ScaleUnit": "100%" }, @@ -1178,6 +1187,7 @@ "MetricGroup": "BadSpec;MachineClears;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueMC;tma_issueSyncxn", "MetricName": "tma_machine_clears", "MetricThreshold": "tma_machine_clears > 0.1 & tma_bad_speculation > 0.15", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the CPU has wasted due to Machine Clears. These slots are either wasted by uops fetched prior to the clear; or stalls the out-of-order portion of the machine needs to recover its state after the clear. For example; this can happen due to memory ordering Nukes (e.g. Memory Disambiguation) or Self-Modifying-Code (SMC) nukes. Sample with: MACHINE_CLEARS.COUNT. Related metrics: tma_clears_resteers, tma_contested_accesses, tma_data_sharing, tma_false_sharing, tma_l1_bound, tma_microcode_sequencer, tma_ms_switches, tma_remote_cache", "ScaleUnit": "100%" }, @@ -1205,6 +1215,7 @@ "MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group", "MetricName": "tma_memory_bound", "MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2", + "MetricgroupNoGroup": "TopdownL2", "PublicDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck. Memory Bound estimates fraction of slots where pipeline is likely stalled due to demand load or store instructions. This accounts mainly for (1) non-completed in-flight memory demand loads which coincides with execution units starvation; in addition to (2) cases where stores could impose backpressure on the pipeline when many of them get buffered at the same time (less common out of the two).", "ScaleUnit": "100%" }, @@ -1374,6 +1385,7 @@ "MetricGroup": "TmaL1;TopdownL1;tma_L1_group", "MetricName": "tma_retiring", "MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1", + "MetricgroupNoGroup": "TopdownL1", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. Sample with: UOPS_RETIRED.SLOTS", "ScaleUnit": "100%" }, diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index ca99b9cfe4ad..f57a8f274025 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -52,7 +52,8 @@ _json_event_attributes = [ # Attributes that are in pmu_metric rather than pmu_event. _json_metric_attributes = [ 'metric_name', 'metric_group', 'metric_expr', 'metric_threshold', 'desc', - 'long_desc', 'unit', 'compat', 'aggr_mode', 'event_grouping' + 'long_desc', 'unit', 'compat', 'metricgroup_no_group', 'aggr_mode', + 'event_grouping' ] # Attributes that are bools or enum int values, encoded as '0', '1',... _json_enum_attributes = ['aggr_mode', 'deprecated', 'event_grouping', 'perpkg'] @@ -303,6 +304,7 @@ class JsonEvent: self.deprecated = jd.get('Deprecated') self.metric_name = jd.get('MetricName') self.metric_group = jd.get('MetricGroup') + self.metricgroup_no_group = jd.get('MetricgroupNoGroup') self.event_grouping = convert_metric_constraint(jd.get('MetricConstraint')) self.metric_expr = None if 'MetricExpr' in jd: diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index b7dff8f1021f..80349685cf4d 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -59,6 +59,7 @@ struct pmu_metric { const char *compat; const char *desc; const char *long_desc; + const char *metricgroup_no_group; enum aggr_mode_class aggr_mode; enum metric_event_groups event_grouping; }; diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index ccfef861e931..e890c261ad26 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -152,7 +152,7 @@ def parse_version(version): # - expected values assignments class Test(object): def __init__(self, path, options): - parser = configparser.SafeConfigParser() + parser = configparser.ConfigParser() parser.read(path) log.warning("running '%s'" % path) @@ -247,7 +247,7 @@ class Test(object): return True def load_events(self, path, events): - parser_event = configparser.SafeConfigParser() + parser_event = configparser.ConfigParser() parser_event.read(path) # The event record section header contains 'event' word, @@ -261,7 +261,7 @@ class Test(object): # Read parent event if there's any if (':' in section): base = section[section.index(':') + 1:] - parser_base = configparser.SafeConfigParser() + parser_base = configparser.ConfigParser() parser_base.read(self.test_dir + '/' + base) base_items = parser_base.items('event') diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index a21fb65bc012..fccd8ec4d1b0 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -16,7 +16,7 @@ pinned=0 exclusive=0 exclude_user=0 exclude_kernel=0|1 -exclude_hv=0 +exclude_hv=0|1 exclude_idle=0 mmap=0 comm=0 diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index d8ea6a88163f..a1e2da0a9a6d 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -40,7 +40,6 @@ fd=6 type=0 config=7 optional=1 - # PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND [event7:base-stat] fd=7 @@ -89,79 +88,98 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33024 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33280 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33536 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event16:base-stat] fd=16 -group_fd=11 type=4 -config=33792 -disabled=0 -enable_on_exec=0 -read_format=15 +config=4109 optional=1 -# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event17:base-stat] fd=17 -group_fd=11 type=4 -config=34048 -disabled=0 -enable_on_exec=0 -read_format=15 +config=17039629 optional=1 -# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event18:base-stat] fd=18 -group_fd=11 type=4 -config=34304 -disabled=0 -enable_on_exec=0 -read_format=15 +config=60 optional=1 -# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event19:base-stat] fd=19 -group_fd=11 type=4 -config=34560 -disabled=0 -enable_on_exec=0 -read_format=15 +config=2097421 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +[event20:base-stat] +fd=20 +type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event21:base-stat] +fd=21 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event22:base-stat] +fd=22 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event23:base-stat] +fd=23 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event24:base-stat] +fd=24 +type=4 +config=270 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index b656ab93c5bf..1c52cb05c900 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -90,89 +90,108 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33024 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33280 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33536 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event16:base-stat] fd=16 -group_fd=11 type=4 -config=33792 -disabled=0 -enable_on_exec=0 -read_format=15 +config=4109 optional=1 -# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event17:base-stat] fd=17 -group_fd=11 type=4 -config=34048 -disabled=0 -enable_on_exec=0 -read_format=15 +config=17039629 optional=1 -# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event18:base-stat] fd=18 -group_fd=11 type=4 -config=34304 -disabled=0 -enable_on_exec=0 -read_format=15 +config=60 optional=1 -# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event19:base-stat] fd=19 -group_fd=11 type=4 -config=34560 -disabled=0 -enable_on_exec=0 -read_format=15 +config=2097421 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +[event20:base-stat] +fd=20 +type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event21:base-stat] +fd=21 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event22:base-stat] +fd=22 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event23:base-stat] +fd=23 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event24:base-stat] +fd=24 +type=4 +config=270 optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event20:base-stat] -fd=20 +[event25:base-stat] +fd=25 type=3 config=0 optional=1 @@ -181,8 +200,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event21:base-stat] -fd=21 +[event26:base-stat] +fd=26 type=3 config=65536 optional=1 @@ -191,8 +210,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event22:base-stat] -fd=22 +[event27:base-stat] +fd=27 type=3 config=2 optional=1 @@ -201,8 +220,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event23:base-stat] -fd=23 +[event28:base-stat] +fd=28 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 97625090a1c4..7e961d24a885 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -90,89 +90,108 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33024 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33280 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33536 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event16:base-stat] fd=16 -group_fd=11 type=4 -config=33792 -disabled=0 -enable_on_exec=0 -read_format=15 +config=4109 optional=1 -# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event17:base-stat] fd=17 -group_fd=11 type=4 -config=34048 -disabled=0 -enable_on_exec=0 -read_format=15 +config=17039629 optional=1 -# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event18:base-stat] fd=18 -group_fd=11 type=4 -config=34304 -disabled=0 -enable_on_exec=0 -read_format=15 +config=60 optional=1 -# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event19:base-stat] fd=19 -group_fd=11 type=4 -config=34560 -disabled=0 -enable_on_exec=0 -read_format=15 +config=2097421 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +[event20:base-stat] +fd=20 +type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event21:base-stat] +fd=21 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event22:base-stat] +fd=22 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event23:base-stat] +fd=23 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event24:base-stat] +fd=24 +type=4 +config=270 optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event20:base-stat] -fd=20 +[event25:base-stat] +fd=25 type=3 config=0 optional=1 @@ -181,8 +200,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event21:base-stat] -fd=21 +[event26:base-stat] +fd=26 type=3 config=65536 optional=1 @@ -191,8 +210,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event22:base-stat] -fd=22 +[event27:base-stat] +fd=27 type=3 config=2 optional=1 @@ -201,8 +220,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event23:base-stat] -fd=23 +[event28:base-stat] +fd=28 type=3 config=65538 optional=1 @@ -211,8 +230,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event24:base-stat] -fd=24 +[event29:base-stat] +fd=29 type=3 config=1 optional=1 @@ -221,8 +240,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event25:base-stat] -fd=25 +[event30:base-stat] +fd=30 type=3 config=65537 optional=1 @@ -231,8 +250,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event26:base-stat] -fd=26 +[event31:base-stat] +fd=31 type=3 config=3 optional=1 @@ -241,8 +260,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event27:base-stat] -fd=27 +[event32:base-stat] +fd=32 type=3 config=65539 optional=1 @@ -251,8 +270,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event28:base-stat] -fd=28 +[event33:base-stat] +fd=33 type=3 config=4 optional=1 @@ -261,8 +280,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event29:base-stat] -fd=29 +[event34:base-stat] +fd=34 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index d555042e3fbf..e50535f45977 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -90,89 +90,108 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33024 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33280 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33536 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event16:base-stat] fd=16 -group_fd=11 type=4 -config=33792 -disabled=0 -enable_on_exec=0 -read_format=15 +config=4109 optional=1 -# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event17:base-stat] fd=17 -group_fd=11 type=4 -config=34048 -disabled=0 -enable_on_exec=0 -read_format=15 +config=17039629 optional=1 -# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event18:base-stat] fd=18 -group_fd=11 type=4 -config=34304 -disabled=0 -enable_on_exec=0 -read_format=15 +config=60 optional=1 -# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event19:base-stat] fd=19 -group_fd=11 type=4 -config=34560 -disabled=0 -enable_on_exec=0 -read_format=15 +config=2097421 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +[event20:base-stat] +fd=20 +type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event21:base-stat] +fd=21 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event22:base-stat] +fd=22 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event23:base-stat] +fd=23 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event24:base-stat] +fd=24 +type=4 +config=270 optional=1 # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event20:base-stat] -fd=20 +[event25:base-stat] +fd=25 type=3 config=0 optional=1 @@ -181,8 +200,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event21:base-stat] -fd=21 +[event26:base-stat] +fd=26 type=3 config=65536 optional=1 @@ -191,8 +210,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event22:base-stat] -fd=22 +[event27:base-stat] +fd=27 type=3 config=2 optional=1 @@ -201,8 +220,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event23:base-stat] -fd=23 +[event28:base-stat] +fd=28 type=3 config=65538 optional=1 @@ -211,8 +230,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event24:base-stat] -fd=24 +[event29:base-stat] +fd=29 type=3 config=1 optional=1 @@ -221,8 +240,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event25:base-stat] -fd=25 +[event30:base-stat] +fd=30 type=3 config=65537 optional=1 @@ -231,8 +250,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event26:base-stat] -fd=26 +[event31:base-stat] +fd=31 type=3 config=3 optional=1 @@ -241,8 +260,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event27:base-stat] -fd=27 +[event32:base-stat] +fd=32 type=3 config=65539 optional=1 @@ -251,8 +270,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event28:base-stat] -fd=28 +[event33:base-stat] +fd=33 type=3 config=4 optional=1 @@ -261,8 +280,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event29:base-stat] -fd=29 +[event34:base-stat] +fd=34 type=3 config=65540 optional=1 @@ -271,8 +290,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event30:base-stat] -fd=30 +[event35:base-stat] +fd=35 type=3 config=512 optional=1 @@ -281,8 +300,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event31:base-stat] -fd=31 +[event36:base-stat] +fd=36 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index cbf0e0c74906..733ead151c63 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -120,7 +120,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u p = "FOO/0"; ret = expr__parse(&val, ctx, p); - TEST_ASSERT_VAL("division by zero", ret == -1); + TEST_ASSERT_VAL("division by zero", ret == 0); + TEST_ASSERT_VAL("division by zero", isnan(val)); p = "BAR/"; ret = expr__parse(&val, ctx, p); diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 1185b79e6274..c05148ea400c 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -38,6 +38,7 @@ static void load_runtime_stat(struct evlist *evlist, struct value *vals) evlist__alloc_aggr_stats(evlist, 1); evlist__for_each_entry(evlist, evsel) { count = find_value(evsel->name, vals); + evsel->supported = true; evsel->stats->aggr->counts.val = count; if (evsel__name_is(evsel, "duration_time")) update_stats(&walltime_nsecs_stats, count); diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 2c1d3f704995..b154fbb15d54 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -28,6 +28,18 @@ test_stat_record_report() { echo "stat record and report test [Success]" } +test_stat_record_script() { + echo "stat record and script test" + if ! perf stat record -o - true | perf script -i - 2>&1 | \ + grep -E -q "CPU[[:space:]]+THREAD[[:space:]]+VAL[[:space:]]+ENA[[:space:]]+RUN[[:space:]]+TIME[[:space:]]+EVENT" + then + echo "stat record and script test [Failed]" + err=1 + return + fi + echo "stat record and script test [Success]" +} + test_stat_repeat_weak_groups() { echo "stat repeat weak groups test" if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \ @@ -93,6 +105,7 @@ test_topdown_weak_groups() { test_default_stat test_stat_record_report +test_stat_record_script test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index 4ddb17cb83c5..3a8b9bffa022 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -506,6 +506,13 @@ test_sample() echo "perf record failed with --aux-sample" return 1 fi + # Check with event with PMU name + if perf_record_no_decode -o "${perfdatafile}" -e br_misp_retired.all_branches:u uname ; then + if ! perf_record_no_decode -o "${perfdatafile}" -e '{intel_pt//,br_misp_retired.all_branches/aux-sample-size=8192/}:u' uname ; then + echo "perf record failed with --aux-sample-size" + return 1 + fi + fi echo OK return 0 } diff --git a/tools/perf/tests/shell/test_java_symbol.sh b/tools/perf/tests/shell/test_java_symbol.sh index 90cea8811926..499539d1c479 100755 --- a/tools/perf/tests/shell/test_java_symbol.sh +++ b/tools/perf/tests/shell/test_java_symbol.sh @@ -56,7 +56,7 @@ if [ $? -ne 0 ]; then exit 1 fi -if ! perf inject -i $PERF_DATA -o $PERF_INJ_DATA -j; then +if ! DEBUGINFOD_URLS='' perf inject -i $PERF_DATA -o $PERF_INJ_DATA -j; then echo "Fail to inject samples" exit 1 fi diff --git a/tools/perf/trace/beauty/arch_prctl.c b/tools/perf/trace/beauty/arch_prctl.c index fe022ca67e60..a211348d3204 100644 --- a/tools/perf/trace/beauty/arch_prctl.c +++ b/tools/perf/trace/beauty/arch_prctl.c @@ -12,10 +12,12 @@ static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_1, "ARCH_", x86_arch_prctl_codes_1_offset); static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_2, "ARCH_", x86_arch_prctl_codes_2_offset); +static DEFINE_STRARRAY_OFFSET(x86_arch_prctl_codes_3, "ARCH_", x86_arch_prctl_codes_3_offset); static struct strarray *x86_arch_prctl_codes[] = { &strarray__x86_arch_prctl_codes_1, &strarray__x86_arch_prctl_codes_2, + &strarray__x86_arch_prctl_codes_3, }; static DEFINE_STRARRAYS(x86_arch_prctl_codes); diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh index 57fa6aaffe70..fd5c740512c5 100755 --- a/tools/perf/trace/beauty/x86_arch_prctl.sh +++ b/tools/perf/trace/beauty/x86_arch_prctl.sh @@ -24,3 +24,4 @@ print_range () { print_range 1 0x1 0x1001 print_range 2 0x2 0x2001 +print_range 3 0x4 0x4001 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index bd18fe5f2719..f9df1df1eec0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -214,7 +214,7 @@ perf-$(CONFIG_ZSTD) += zstd.o perf-$(CONFIG_LIBCAP) += cap.o -perf-y += demangle-cxx.o +perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o perf-y += demangle-ocaml.o perf-y += demangle-java.o perf-y += demangle-rust.o diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 8d3cfbb3cc65..1d48226ae75d 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -416,6 +416,8 @@ int contention_end(u64 *ctx) return 0; } +struct rq {}; + extern struct rq runqueues __ksym; struct rq___old { diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index cffe493af1ed..fb94f5280626 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -25,7 +25,7 @@ struct perf_sample_data___new { } __attribute__((preserve_access_index)); /* new kernel perf_mem_data_src definition */ -union perf_mem_data_src__new { +union perf_mem_data_src___new { __u64 val; struct { __u64 mem_op:5, /* type of opcode */ @@ -108,7 +108,7 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 7) return kctx->data->data_src.mem_blk; if (entry->part == 8) { - union perf_mem_data_src__new *data = (void *)&kctx->data->data_src; + union perf_mem_data_src___new *data = (void *)&kctx->data->data_src; if (bpf_core_field_exists(data->mem_hops)) return data->mem_hops; diff --git a/tools/perf/util/bpf_skel/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux.h index 449b1ea91fc4..c7ed51b0c1ef 100644 --- a/tools/perf/util/bpf_skel/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux.h @@ -1,6 +1,7 @@ #ifndef __VMLINUX_H #define __VMLINUX_H +#include <linux/stddef.h> // for define __always_inline #include <linux/bpf.h> #include <linux/types.h> #include <linux/perf_event.h> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 70cac0375b34..ecca40787ac9 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -227,6 +227,19 @@ struct cs_etm_packet_queue { #define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \ sizeof(((struct perf_record_auxtrace_info *)0)->reserved__)) +/* CoreSight trace ID is currently the bottom 7 bits of the value */ +#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) + +/* + * perf record will set the legacy meta data values as unused initially. + * This allows perf report to manage the decoders created when dynamic + * allocation in operation. + */ +#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) + +/* Value to set for unused trace ID values */ +#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 356c07f03be6..c2dbb5647e75 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -282,6 +282,7 @@ void evsel__init(struct evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); INIT_LIST_HEAD(&evsel->bpf_counter_list); + INIT_LIST_HEAD(&evsel->bpf_filters); perf_evsel__object.init(evsel); evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); @@ -290,6 +291,7 @@ void evsel__init(struct evsel *evsel, evsel->per_pkg_mask = NULL; evsel->collect_stat = false; evsel->pmu_name = NULL; + evsel->skippable = false; } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -828,26 +830,26 @@ bool evsel__name_is(struct evsel *evsel, const char *name) const char *evsel__group_pmu_name(const struct evsel *evsel) { - const struct evsel *leader; + struct evsel *leader = evsel__leader(evsel); + struct evsel *pos; - /* If the pmu_name is set use it. pmu_name isn't set for CPU and software events. */ - if (evsel->pmu_name) - return evsel->pmu_name; /* * Software events may be in a group with other uncore PMU events. Use - * the pmu_name of the group leader to avoid breaking the software event - * out of the group. + * the pmu_name of the first non-software event to avoid breaking the + * software event out of the group. * * Aux event leaders, like intel_pt, expect a group with events from * other PMUs, so substitute the AUX event's PMU in this case. */ - leader = evsel__leader(evsel); - if ((evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) && - leader->pmu_name) { - return leader->pmu_name; + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE || evsel__is_aux_event(leader)) { + /* Starting with the leader, find the first event with a named PMU. */ + for_each_group_evsel(pos, leader) { + if (pos->pmu_name) + return pos->pmu_name; + } } - return "cpu"; + return evsel->pmu_name ?: "cpu"; } const char *evsel__metric_id(const struct evsel *evsel) @@ -1725,9 +1727,13 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) return -1; fd = FD(leader, cpu_map_idx, thread); - BUG_ON(fd == -1); + BUG_ON(fd == -1 && !leader->skippable); - return fd; + /* + * When the leader has been skipped, return -2 to distinguish from no + * group leader case. + */ + return fd == -1 ? -2 : fd; } static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int thread_idx) @@ -2109,6 +2115,12 @@ retry_open: group_fd = get_group_fd(evsel, idx, thread); + if (group_fd == -2) { + pr_debug("broken group leader for %s\n", evsel->name); + err = -EINVAL; + goto out_close; + } + test_attr__ready(); /* Debug message used by test scripts */ diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index d575390d80bc..0f54f28a69c2 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -95,6 +95,7 @@ struct evsel { bool weak_group; bool bpf_counter; bool use_config_name; + bool skippable; int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; @@ -150,10 +151,8 @@ struct evsel { */ struct bpf_counter_ops *bpf_counter_ops; - union { - struct list_head bpf_counter_list; /* for perf-stat -b */ - struct list_head bpf_filters; /* for perf-record --filter */ - }; + struct list_head bpf_counter_list; /* for perf-stat -b */ + struct list_head bpf_filters; /* for perf-record --filter */ /* for perf-stat --use-bpf */ int bperf_leader_prog_fd; diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 250e444bf032..4ce931cccb63 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -225,7 +225,11 @@ expr: NUMBER { if (fpclassify($3.val) == FP_ZERO) { pr_debug("division by zero\n"); - YYABORT; + assert($3.ids == NULL); + if (compute_ids) + ids__free($1.ids); + $$.val = NAN; + $$.ids = NULL; } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { assert($1.ids == NULL); assert($3.ids == NULL); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c566c6859302..5e9c657dd3f7 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1144,12 +1144,12 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && - (match_metric(pm->metric_group, data->metric_name) || - match_metric(pm->metric_name, data->metric_name))) { + if (pm->metric_expr && match_pm_metric(pm, data->metric_name)) { + bool metric_no_group = data->metric_no_group || + match_metric(data->metric_name, pm->metricgroup_no_group); data->has_match = true; - ret = add_metric(data->list, pm, data->modifier, data->metric_no_group, + ret = add_metric(data->list, pm, data->modifier, metric_no_group, data->metric_no_threshold, data->user_requested_cpu_list, data->system_wide, /*root_metric=*/NULL, /*visited_metrics=*/NULL, table); @@ -1672,7 +1672,7 @@ static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, { unsigned int *max_level = data; unsigned int level; - const char *p = strstr(pm->metric_group, "TopdownL"); + const char *p = strstr(pm->metric_group ?: "", "TopdownL"); if (!p || p[8] == '\0') return 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d71019dcd614..34ba840ae19a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2140,25 +2140,32 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list int *leader_idx = state; int lhs_leader_idx = *leader_idx, rhs_leader_idx = *leader_idx, ret; const char *lhs_pmu_name, *rhs_pmu_name; + bool lhs_has_group = false, rhs_has_group = false; /* * First sort by grouping/leader. Read the leader idx only if the evsel * is part of a group, as -1 indicates no group. */ - if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) + if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) { + lhs_has_group = true; lhs_leader_idx = lhs_core->leader->idx; - if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) + } + if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) { + rhs_has_group = true; rhs_leader_idx = rhs_core->leader->idx; + } if (lhs_leader_idx != rhs_leader_idx) return lhs_leader_idx - rhs_leader_idx; - /* Group by PMU. Groups can't span PMUs. */ - lhs_pmu_name = evsel__group_pmu_name(lhs); - rhs_pmu_name = evsel__group_pmu_name(rhs); - ret = strcmp(lhs_pmu_name, rhs_pmu_name); - if (ret) - return ret; + /* Group by PMU if there is a group. Groups can't span PMUs. */ + if (lhs_has_group && rhs_has_group) { + lhs_pmu_name = evsel__group_pmu_name(lhs); + rhs_pmu_name = evsel__group_pmu_name(rhs); + ret = strcmp(lhs_pmu_name, rhs_pmu_name); + if (ret) + return ret; + } /* Architecture specific sorting. */ return arch_evlist__cmp(lhs, rhs); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 73b2ff2ddf29..bf5a6c14dfcd 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -431,7 +431,7 @@ static void print_metric_json(struct perf_stat_config *config __maybe_unused, struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : %f, ", val); + fprintf(out, "\"metric-value\" : \"%f\", ", val); fprintf(out, "\"metric-unit\" : \"%s\"", unit); if (!config->metric_only) fprintf(out, "}"); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index eeccab6751d7..1566a206ba42 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -403,12 +403,25 @@ static int prepare_metric(struct evsel **metric_events, if (!aggr) break; - /* - * If an event was scaled during stat gathering, reverse - * the scale before computing the metric. - */ - val = aggr->counts.val * (1.0 / metric_events[i]->scale); - source_count = evsel__source_count(metric_events[i]); + if (!metric_events[i]->supported) { + /* + * Not supported events will have a count of 0, + * which can be confusing in a + * metric. Explicitly set the value to NAN. Not + * counted events (enable time of 0) are read as + * 0. + */ + val = NAN; + source_count = 0; + } else { + /* + * If an event was scaled during stat gathering, + * reverse the scale before computing the + * metric. + */ + val = aggr->counts.val * (1.0 / metric_events[i]->scale); + source_count = evsel__source_count(metric_events[i]); + } } n = strdup(evsel__metric_id(metric_events[i])); if (!n) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b2ed9cc52265..63882a4db5c7 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -31,6 +31,13 @@ #include <bfd.h> #endif +#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT) +#ifndef DMGL_PARAMS +#define DMGL_PARAMS (1 << 0) /* Include function args */ +#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ +#endif +#endif + #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ #endif @@ -271,6 +278,26 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} + static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) { char *demangled = NULL; diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c index 0ce29ee4c2e4..a7a59c6bacda 100644 --- a/tools/power/cpupower/lib/powercap.c +++ b/tools/power/cpupower/lib/powercap.c @@ -40,25 +40,34 @@ static int sysfs_get_enabled(char *path, int *mode) { int fd; char yes_no; + int ret = 0; *mode = 0; fd = open(path, O_RDONLY); - if (fd == -1) - return -1; + if (fd == -1) { + ret = -1; + goto out; + } if (read(fd, &yes_no, 1) != 1) { - close(fd); - return -1; + ret = -1; + goto out_close; } if (yes_no == '1') { *mode = 1; - return 0; + goto out_close; } else if (yes_no == '0') { - return 0; + goto out_close; + } else { + ret = -1; + goto out_close; } - return -1; +out_close: + close(fd); +out: + return ret; } int powercap_get_enabled(int *mode) diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index e7d48cb563c0..ae6af354a81d 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -70,8 +70,8 @@ static int max_freq_mode; */ static unsigned long max_frequency; -static unsigned long long tsc_at_measure_start; -static unsigned long long tsc_at_measure_end; +static unsigned long long *tsc_at_measure_start; +static unsigned long long *tsc_at_measure_end; static unsigned long long *mperf_previous_count; static unsigned long long *aperf_previous_count; static unsigned long long *mperf_current_count; @@ -169,7 +169,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; if (max_freq_mode == MAX_FREQ_TSC_REF) { - tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu]; *percent = 100.0 * mperf_diff / tsc_diff; dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); @@ -206,7 +206,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, if (max_freq_mode == MAX_FREQ_TSC_REF) { /* Calculate max_freq from TSC count */ - tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu]; time_diff = timespec_diff_us(time_start, time_end); max_frequency = tsc_diff / time_diff; } @@ -225,33 +225,27 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, static int mperf_start(void) { int cpu; - unsigned long long dbg; clock_gettime(CLOCK_REALTIME, &time_start); - mperf_get_tsc(&tsc_at_measure_start); - for (cpu = 0; cpu < cpu_count; cpu++) + for (cpu = 0; cpu < cpu_count; cpu++) { + mperf_get_tsc(&tsc_at_measure_start[cpu]); mperf_init_stats(cpu); + } - mperf_get_tsc(&dbg); - dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); return 0; } static int mperf_stop(void) { - unsigned long long dbg; int cpu; - for (cpu = 0; cpu < cpu_count; cpu++) + for (cpu = 0; cpu < cpu_count; cpu++) { mperf_measure_stats(cpu); + mperf_get_tsc(&tsc_at_measure_end[cpu]); + } - mperf_get_tsc(&tsc_at_measure_end); clock_gettime(CLOCK_REALTIME, &time_end); - - mperf_get_tsc(&dbg); - dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); - return 0; } @@ -353,7 +347,8 @@ struct cpuidle_monitor *mperf_register(void) aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); mperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); - + tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long)); + tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long)); mperf_monitor.name_len = strlen(mperf_monitor.name); return &mperf_monitor; } @@ -364,6 +359,8 @@ void mperf_unregister(void) free(aperf_previous_count); free(mperf_current_count); free(aperf_current_count); + free(tsc_at_measure_start); + free(tsc_at_measure_end); free(is_valid); } diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index fba7bec96acd..6f9347ade82c 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -6,6 +6,7 @@ ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm +ldflags-y += --wrap=devm_cxl_enable_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index ba572d03c687..34b48027b3de 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1256,6 +1256,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + cxlds->media_ready = true; rc = cxl_dev_state_identify(cxlds); if (rc) return rc; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index c4e53f22e421..284416527644 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -19,7 +19,7 @@ void register_cxl_mock_ops(struct cxl_mock_ops *ops) } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); -static DEFINE_SRCU(cxl_mock_srcu); +DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { @@ -149,6 +149,21 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); +int __wrap_devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm) +{ + int index, rc; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = 0; + else + rc = devm_cxl_enable_hdm(port, cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enable_hdm, CXL); + int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) { int rc, index; diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 0a6837f97c32..08adc805878b 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,33 +1,6 @@ -bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 -bpf_cookie/lsm -bpf_cookie/multi_kprobe_attach_api -bpf_cookie/multi_kprobe_link_api -bpf_cookie/trampoline -bpf_loop/check_callback_fn_stop # link unexpected error: -524 -bpf_loop/check_invalid_flags -bpf_loop/check_nested_calls -bpf_loop/check_non_constant_callback -bpf_loop/check_nr_loops -bpf_loop/check_null_callback_ctx -bpf_loop/check_stack -bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) -bpf_tcp_ca/dctcp_fallback -btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 -cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) -d_path/basic # setup attach failed: -524 -deny_namespace # attach unexpected error: -524 (errno 524) -fentry_fexit # fentry_attach unexpected error: -1 (errno 524) -fentry_test # fentry_attach unexpected error: -1 (errno 524) -fexit_sleep # fexit_attach fexit attach failed: -1 -fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 -fexit_test # fexit_attach unexpected error: -1 (errno 524) -get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) -get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) -htab_update/reenter_update -kfree_skb # attach fentry unexpected error: -524 (trampoline) -kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF -kfunc_call/subprog_lskel # skel unexpected error: -2 -kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 +bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 @@ -35,51 +8,5 @@ kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_mu kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) -ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF -ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 -libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) -linked_list -lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) -lru_bug # lru_bug__attach unexpected error: -524 (errno 524) -modify_return # modify_return__attach failed unexpected error: -524 (errno 524) -module_attach # skel_attach skeleton attach failed: -524 -module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0 -mptcp/base # run_test mptcp unexpected error: -524 (errno 524) -netcnt # packets unexpected packets: actual 10001 != expected 10000 -rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 -recursion # skel_attach unexpected error: -524 (errno 524) -ringbuf # skel_attach skeleton attachment failed: -1 -setget_sockopt # attach_cgroup unexpected error: -524 -sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) -skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) -socket_cookie # prog_attach unexpected error: -524 -stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 -task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) -task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) -test_bprm_opts # attach attach failed: -524 -test_ima # attach attach failed: -524 -test_local_storage # attach lsm attach failed: -524 -test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) -test_overhead # attach_fentry unexpected error: -524 -timer # timer unexpected error: -524 (errno 524) -timer_crash # timer_crash__attach unexpected error: -524 (errno 524) -timer_mim # timer_mim unexpected error: -524 (errno 524) -trace_printk # trace_printk__attach unexpected error: -1 (errno 524) -trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) -tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) -trampoline_count # attach_prog unexpected error: -524 -unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_post_producer_wrong_offset -user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz -user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_sample_full_ring_buffer -user_ringbuf/test_user_ringbuf_post_alignment_autoadjust -user_ringbuf/test_user_ringbuf_overfill -user_ringbuf/test_user_ringbuf_discards_properly_ignored -user_ringbuf/test_user_ringbuf_loop -user_ringbuf/test_user_ringbuf_msg_protocol -user_ringbuf/test_user_ringbuf_blocking_reserve -verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) -vmlinux # skel_attach skeleton attach failed: -524 +kprobe_multi_test/skel_api # libbpf: failed to load BPF skeleton 'kprobe_multi': -3 +module_attach # prog 'kprobe_multi': failed to auto-attach: -95 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c7463f3ec3c0..5061d9e24c16 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) xdp_metadata # JIT does not support calling kernel function (kfunc) +test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c49e5403ad0e..538df8fb8c42 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -88,8 +88,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file -TEST_GEN_FILES += liburandom_read.so +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -197,7 +196,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(call msg,SIGN-FILE,,$@) - $(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \ + $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \ $< -o $@ \ $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8c993ec8ceea..642dda0e758a 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -35,4 +35,10 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, void *buffer, __u32 buffer__szk) __ksym; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; +extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; +extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; +extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; +extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 52785ba671e6..cf216041876c 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -9,6 +9,7 @@ #include <linux/sysfs.h> #include <linux/tracepoint.h> #include "bpf_testmod.h" +#include "bpf_testmod_kfunc.h" #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" @@ -289,8 +290,171 @@ static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .set = &bpf_testmod_common_kfunc_ids, }; +__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +{ + return a + b + c + d; +} + +__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +{ + return a + b; +} + +__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) +{ + return sk; +} + +__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) +{ + /* Provoke the compiler to assume that the caller has sign-extended a, + * b and c on platforms where this is required (e.g. s390x). + */ + return (long)a + (long)b + (long)c + d; +} + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), +}; + +__bpf_kfunc struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + refcount_inc(&prog_test_struct.cnt); + return &prog_test_struct; +} + +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + +__bpf_kfunc struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + WARN_ON_ONCE(1); + return NULL; +} + +__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ + WARN_ON_ONCE(1); +} + +static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) +{ + if (size > 2 * sizeof(int)) + return NULL; + + return (int *)p; +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, + const int rdwr_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +/* the next 2 ones can't be really used for testing expect to ensure + * that the verifier rejects the call. + * Acquire functions must return struct pointers, so these ones are + * failing. + */ +__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ + /* p != NULL, but p->cnt could be 0 */ +} + +__bpf_kfunc void bpf_kfunc_call_test_destructive(void) +{ +} + +__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) +{ + return arg; +} + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test4) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { @@ -312,6 +476,8 @@ static int bpf_testmod_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h new file mode 100644 index 000000000000..9693c626646b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_TESTMOD_KFUNC_H +#define _BPF_TESTMOD_KFUNC_H + +#ifndef __KERNEL__ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#else +#define __ksym +struct prog_test_member1 { + int a; +}; + +struct prog_test_member { + struct prog_test_member1 m; + int c; +}; + +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_member memb; + struct prog_test_ref_kfunc *next; + refcount_t cnt; +}; +#endif + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; +void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; + +void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +void bpf_kfunc_call_int_mem_release(int *p) __ksym; + +/* The bpf_kfunc_call_test_static_unused_arg is defined as static, + * but bpf program compilation needs to see it as global symbol. + */ +#ifndef __KERNEL__ +u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#endif + +void bpf_testmod_test_mod_kfunc(int i) __ksym; + +__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; +int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; +struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; +long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; + +void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + +void bpf_kfunc_call_test_destructive(void) __ksym; + +#endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 596caa176582..a105c0cd008a 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -427,3 +427,26 @@ void close_netns(struct nstoken *token) close(token->orig_netns_fd); free(token); } + +int get_socket_local_port(int sock_fd) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + int err; + + err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); + if (err < 0) + return err; + + if (addr.ss_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&addr; + + return sin->sin_port; + } else if (addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; + + return sin->sin6_port; + } + + return -1; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index f882c691b790..694185644da6 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -56,6 +56,7 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); +int get_socket_local_port(int sock_fd); struct nstoken; /** diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index b17bfa0e0aac..bb143de68875 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -96,12 +96,80 @@ static void test_parse_test_list(void) goto error; ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name"); ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count")) + goto error; + ASSERT_OK(strcmp("t", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name"); + ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name"); error: free_test_filter_set(&set); } +static void test_parse_test_list_file(void) +{ + struct test_filter_set set; + char tmpfile[80]; + FILE *fp; + int fd; + + snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX"); + fd = mkstemp(tmpfile); + if (!ASSERT_GE(fd, 0, "create tmp")) + return; + + fp = fdopen(fd, "w"); + if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) { + close(fd); + goto out_remove; + } + + fprintf(fp, "# comment\n"); + fprintf(fp, " test_with_spaces \n"); + fprintf(fp, "testA/subtest # comment\n"); + fprintf(fp, "testB#comment with no space\n"); + fprintf(fp, "testB # duplicate\n"); + fprintf(fp, "testA/subtest # subtest duplicate\n"); + fprintf(fp, "testA/subtest2\n"); + fprintf(fp, "testC_no_eof_newline"); + fflush(fp); + + if (!ASSERT_OK(ferror(fp), "prepare tmp")) + goto out_fclose; + + init_test_filter_set(&set); + + ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + + ASSERT_EQ(set.cnt, 4, "test count"); + ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); + ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); + ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count"); + ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0"); + ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1"); + ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); + ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); + + free_test_filter_set(&set); + +out_fclose: + fclose(fp); +out_remove: + remove(tmpfile); +} + void test_arg_parsing(void) { if (test__start_subtest("test_parse_test_list")) test_parse_test_list(); + if (test__start_subtest("test_parse_test_list_file")) + test_parse_test_list_file(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index a4d0cc9d3367..fe2c502e5089 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -11,6 +11,7 @@ #include "ksym_race.skel.h" #include "bpf_mod_race.skel.h" #include "kfunc_call_race.skel.h" +#include "testing_helpers.h" /* This test crafts a race between btf_try_get_module and do_init_module, and * checks whether btf_try_get_module handles the invocation for a well-formed @@ -44,35 +45,10 @@ enum bpf_test_state { static _Atomic enum bpf_test_state state = _TS_INVALID; -static int sys_finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int sys_delete_module(const char *name, unsigned int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -static int load_module(const char *mod) -{ - int ret, fd; - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) - return fd; - - ret = sys_finit_module(fd, "", 0); - close(fd); - if (ret < 0) - return ret; - return 0; -} - static void *load_module_thread(void *p) { - if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) + if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail")) atomic_store(&state, TS_MODULE_LOAD); else atomic_store(&state, TS_MODULE_LOAD_FAIL); @@ -124,7 +100,7 @@ static void test_bpf_mod_race_config(const struct test_config *config) if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) return; - if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) + if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod")) goto end_mmap; skel = bpf_mod_race__open(); @@ -202,8 +178,8 @@ end_destroy: bpf_mod_race__destroy(skel); ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); end_module: - sys_delete_module("bpf_testmod", 0); - ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); + unload_bpf_testmod(false); + ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod"); end_mmap: munmap(fault_addr, 4096); atomic_store(&state, _TS_INVALID); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c new file mode 100644 index 000000000000..31f1e815f671 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <test_progs.h> +#include <bpf/btf.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +__attribute__((unused)) +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned int ms_flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags); +} + +static void bpf_obj_pinning_detached(void) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + int fs_fd = -1, mnt_fd = -1; + int map_fd = -1, map_fd2 = -1; + int zero = 0, src_value, dst_value, err; + const char *map_name = "fsmount_map"; + + /* A bunch of below UAPI calls are constructed based on reading: + * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html + */ + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + if (!ASSERT_GE(fs_fd, 0, "fs_fd")) + goto cleanup; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (!ASSERT_OK(err, "fs_create")) + goto cleanup; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_GE(mnt_fd, 0, "mnt_fd")) + goto cleanup; + + /* If we wanted to expose detached mount in the file system, we'd do + * something like below. But the whole point is that we actually don't + * even have to expose BPF FS in the file system to be able to work + * (pin/get objects) with it. + * + * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH); + * if (!ASSERT_OK(err, "move_mount")) + * goto cleanup; + */ + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + goto cleanup; + + /* pin BPF map into detached BPF FS through mnt_fd */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = mnt_fd; + err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts); + if (!ASSERT_OK(err, "map_pin")) + goto cleanup; + + /* get BPF map from detached BPF FS through mnt_fd */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = mnt_fd; + map_fd2 = bpf_obj_get_opts(map_name, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + src_value = 0xcafebeef; + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq1"); + ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2"); + +cleanup: + if (map_fd >= 0) + ASSERT_OK(close(map_fd), "close_map_fd"); + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + if (fs_fd >= 0) + ASSERT_OK(close(fs_fd), "close_fs_fd"); + if (mnt_fd >= 0) + ASSERT_OK(close(mnt_fd), "close_mnt_fd"); +} + +enum path_kind +{ + PATH_STR_ABS, + PATH_STR_REL, + PATH_FD_REL, +}; + +static void validate_pin(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(pin_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + /* pin BPF map using specified path definition */ + err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts); + ASSERT_OK(err, "obj_pin"); + + /* cleanup */ + if (pin_opts.path_fd >= 0) + close(pin_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + map_fd2 = bpf_obj_get(abs_path); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void validate_get(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + /* pin BPF map using specified path definition */ + err = bpf_obj_pin(map_fd, abs_path); + if (!ASSERT_OK(err, "pin_map")) + return; + + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(get_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + map_fd2 = bpf_obj_get_opts(pin_path, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* cleanup */ + if (get_opts.path_fd >= 0) + close(get_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void bpf_obj_pinning_mounted(enum path_kind path_kind) +{ + const char *map_name = "mounted_map"; + int map_fd; + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + return; + + validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind); + validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind); + ASSERT_OK(close(map_fd), "close_map_fd"); +} + +void test_bpf_obj_pinning() +{ + if (test__start_subtest("detached")) + bpf_obj_pinning_detached(); + if (test__start_subtest("mounted-str-abs")) + bpf_obj_pinning_mounted(PATH_STR_ABS); + if (test__start_subtest("mounted-str-rel")) + bpf_obj_pinning_mounted(PATH_STR_REL); + if (test__start_subtest("mounted-fd-rel")) + bpf_obj_pinning_mounted(PATH_FD_REL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c index 4d2fa99273d8..2bb5773d6f99 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c @@ -25,6 +25,8 @@ static void test_setsockopt_set(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, assert that * we actually get that error when we run setsockopt() */ @@ -59,6 +61,8 @@ static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, and one that gets the * previously set errno. Assert that we get the same errno back. */ @@ -100,6 +104,8 @@ static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that gets the previously set errno. * Assert that, without anything setting one, we get 0. */ @@ -134,6 +140,8 @@ static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that gets the previously set errno, and then * one that sets the errno to EUNATCH. Assert that the get does not * see EUNATCH set later, and does not prevent EUNATCH from being set. @@ -177,6 +185,8 @@ static void test_setsockopt_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN, * and then one that gets the exported errno. Assert both the syscall * and the helper sees the last set errno. @@ -224,6 +234,8 @@ static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that return a reject without setting errno * (legacy reject), and one that gets the errno. Assert that for * backward compatibility the syscall result in EPERM, and this @@ -268,6 +280,8 @@ static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, then one that return a reject * without setting errno, and then one that gets the exported errno. * Assert both the syscall and the helper's errno are unaffected by @@ -319,6 +333,8 @@ static void test_getsockopt_get(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that gets previously set errno. Assert that the * error from kernel is in both ctx_retval_value and retval_value. */ @@ -359,6 +375,8 @@ static void test_getsockopt_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that sets retval to -EISCONN. Assert that this * overrides the value from kernel. */ @@ -396,6 +414,8 @@ static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that sets retval to -EISCONN, and one that clears * ctx retval. Assert that the clearing ctx retval is synced to helper * and clears any errors both from kernel and BPF.. diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index d176c34a7d2e..7cfac53c0d58 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -20,6 +20,14 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_adjust", SETUP_SYSCALL_SLEEP}, + {"test_adjust_err", SETUP_SYSCALL_SLEEP}, + {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_is_rdonly", SETUP_SKB_PROG}, + {"test_dynptr_clone", SETUP_SKB_PROG}, + {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, + {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c new file mode 100644 index 000000000000..fd41425d2e5c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <errno.h> +#include <sys/syscall.h> +#include <unistd.h> +#include "test_global_map_resize.skel.h" +#include "test_progs.h" + +static void run_prog_bss_array_sum(void) +{ + (void)syscall(__NR_getpid); +} + +static void run_prog_data_array_sum(void) +{ + (void)syscall(__NR_getuid); +} + +static void global_map_resize_bss_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->bss->array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.bss; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz); + if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->bss->array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = array_len; + skel->rodata->data_array_len = 1; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_bss_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_data_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->data_custom->my_array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.data_custom; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz); + if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->data_custom->my_array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = 1; + skel->rodata->data_array_len = array_len; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_data_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_invalid_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + __u32 element_sz, desired_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + return; + + /* attempt to resize a global datasec map to size + * which does NOT align with array + */ + map = skel->maps.data_custom; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf")) + goto teardown; + /* set desired size a fraction of element size beyond an aligned size */ + element_sz = sizeof(skel->data_custom->my_array[0]); + desired_sz = element_sz + element_sz / 2; + /* confirm desired size does NOT align with array */ + if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map whose only var is NOT an array */ + map = skel->maps.data_non_array; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf")) + goto teardown; + /* set desired size to arbitrary value */ + desired_sz = 1024; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map + * whose last var is NOT an array + */ + map = skel->maps.data_array_not_last; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf")) + goto teardown; + /* set desired size to a multiple of element size */ + element_sz = sizeof(skel->data_array_not_last->my_array_first[0]); + desired_sz = element_sz * 8; + /* confirm desired size aligns with array */ + if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +void test_global_map_resize(void) +{ + if (test__start_subtest("global_map_resize_bss")) + global_map_resize_bss_subtest(); + + if (test__start_subtest("global_map_resize_data")) + global_map_resize_data_subtest(); + + if (test__start_subtest("global_map_resize_invalid")) + global_map_resize_invalid_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c new file mode 100644 index 000000000000..9ab4cd195108 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> + +#include "inner_array_lookup.skel.h" + +void test_inner_array_lookup(void) +{ + int map1_fd, err; + int key = 3; + int val = 1; + struct inner_array_lookup *skel; + + skel = inner_array_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load_skeleton")) + return; + + err = inner_array_lookup__attach(skel); + if (!ASSERT_OK(err, "skeleton_attach")) + goto cleanup; + + map1_fd = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(map1_fd, &key, &val, 0); + + /* Probe should have set the element at index 3 to 2 */ + bpf_map_lookup_elem(map1_fd, &key, &val); + ASSERT_EQ(val, 2, "value_is_2"); + +cleanup: + inner_array_lookup__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 7fc01ff490db..f53d658ed080 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include <stdbool.h> #include "test_module_attach.skel.h" +#include "testing_helpers.h" static int duration; @@ -32,11 +33,6 @@ static int trigger_module_test_writable(int *val) return 0; } -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - void test_module_attach(void) { const int READ_SZ = 456; @@ -93,21 +89,21 @@ void test_module_attach(void) if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.handle_fexit); if (!ASSERT_OK_PTR(link, "attach_fexit")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.kprobe_multi); if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index d3915c58d0e1..c3333edd029f 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -67,12 +67,12 @@ void serial_test_netcnt(void) } /* No packets should be lost */ - ASSERT_EQ(packets, 10000, "packets"); + ASSERT_GE(packets, 10000, "packets"); /* Let's check that bytes counter matches the number of packets * multiplied by the size of ipv6 ICMP packet. */ - ASSERT_EQ(bytes, packets * 104, "bytes"); + ASSERT_GE(bytes, packets * 104, "bytes"); err: if (cg_fd != -1) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c new file mode 100644 index 000000000000..b0583309a94e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/bpf_endian.h> + +#include "sock_destroy_prog.skel.h" +#include "sock_destroy_prog_fail.skel.h" +#include "network_helpers.h" + +#define TEST_NS "sock_destroy_netns" + +static void start_iter_sockets(struct bpf_program *prog) +{ + struct bpf_link *link; + char buf[50] = {}; + int iter_fd, len; + + link = bpf_program__attach_iter(prog, NULL); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto free_link; + + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + ASSERT_GE(len, 0, "read"); + + close(iter_fd); + +free_link: + bpf_link__destroy(link); +} + +static void test_tcp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys connected client sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_tcp_server(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n, serv_port; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + serv_port = get_socket_local_port(serv); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_server); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_udp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, n = 0; + + serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys sockets. */ + start_iter_sockets(skel->progs.iter_udp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + /* UDP sockets have an overriding error code after they are disconnected, + * so we don't check for ECONNABORTED error code. + */ + +cleanup: + if (clien != -1) + close(clien); + if (serv != -1) + close(serv); +} + +static void test_udp_server(struct sock_destroy_prog *skel) +{ + int *listen_fds = NULL, n, i, serv_port; + unsigned int num_listens = 5; + char buf[1]; + + /* Start reuseport servers. */ + listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM, + "::1", 0, 0, num_listens); + if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server")) + goto cleanup; + serv_port = get_socket_local_port(listen_fds[0]); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_udp6_server); + + for (i = 0; i < num_listens; ++i) { + n = read(listen_fds[i], buf, sizeof(buf)); + if (!ASSERT_EQ(n, -1, "read") || + !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket")) + break; + } + ASSERT_EQ(i, num_listens, "server socket"); + +cleanup: + free_fds(listen_fds, num_listens); +} + +void test_sock_destroy(void) +{ + struct sock_destroy_prog *skel; + struct nstoken *nstoken = NULL; + int cgroup_fd; + + skel = sock_destroy_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + cgroup_fd = test__join_cgroup("/sock_destroy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + skel->links.sock_connect = bpf_program__attach_cgroup( + skel->progs.sock_connect, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach")) + goto cleanup; + + SYS(cleanup, "ip netns add %s", TEST_NS); + SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto cleanup; + + if (test__start_subtest("tcp_client")) + test_tcp_client(skel); + if (test__start_subtest("tcp_server")) + test_tcp_server(skel); + if (test__start_subtest("udp_client")) + test_udp_client(skel); + if (test__start_subtest("udp_server")) + test_udp_server(skel); + + RUN_TESTS(sock_destroy_prog_fail); + +cleanup: + if (nstoken) + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + if (cgroup_fd >= 0) + close(cgroup_fd); + sock_destroy_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 0ce25a967481..064cc5e8d9ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -2,6 +2,7 @@ // Copyright (c) 2020 Cloudflare #include <error.h> #include <netinet/tcp.h> +#include <sys/epoll.h> #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" @@ -9,8 +10,12 @@ #include "test_sockmap_invalid_update.skel.h" #include "test_sockmap_skb_verdict_attach.skel.h" #include "test_sockmap_progs_query.skel.h" +#include "test_sockmap_pass_prog.skel.h" +#include "test_sockmap_drop_prog.skel.h" #include "bpf_iter_sockmap.skel.h" +#include "sockmap_helpers.h" + #define TCP_REPAIR 19 /* TCP sock is under repair right now */ #define TCP_REPAIR_ON 1 @@ -350,6 +355,126 @@ out: test_sockmap_progs_query__destroy(skel); } +#define MAX_EVENTS 10 +static void test_sockmap_skb_verdict_shutdown(void) +{ + struct epoll_event ev, events[MAX_EVENTS]; + int n, err, map, verdict, s, c1, p1; + struct test_sockmap_pass_prog *skel; + int epollfd; + int zero = 0; + char b; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + map = bpf_map__fd(skel->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + s = socket_loopback(AF_INET, SOCK_STREAM); + if (s < 0) + goto out; + err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + if (err < 0) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (err < 0) + goto out_close; + + shutdown(p1, SHUT_WR); + + ev.events = EPOLLIN; + ev.data.fd = c1; + + epollfd = epoll_create1(0); + if (!ASSERT_GT(epollfd, -1, "epoll_create(0)")) + goto out_close; + err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev); + if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)")) + goto out_close; + err = epoll_wait(epollfd, events, MAX_EVENTS, -1); + if (!ASSERT_EQ(err, 1, "epoll_wait(fd)")) + goto out_close; + + n = recv(c1, &b, 1, SOCK_NONBLOCK); + ASSERT_EQ(n, 0, "recv_timeout(fin)"); +out_close: + close(c1); + close(p1); +out: + test_sockmap_pass_prog__destroy(skel); +} + +static void test_sockmap_skb_verdict_fionread(bool pass_prog) +{ + int expected, zero = 0, sent, recvd, avail; + int err, map, verdict, s, c0, c1, p0, p1; + struct test_sockmap_pass_prog *pass; + struct test_sockmap_drop_prog *drop; + char buf[256] = "0123456789"; + + if (pass_prog) { + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + map = bpf_map__fd(pass->maps.sock_map_rx); + expected = sizeof(buf); + } else { + drop = test_sockmap_drop_prog__open_and_load(); + if (!ASSERT_OK_PTR(drop, "open_and_load")) + return; + verdict = bpf_program__fd(drop->progs.prog_skb_verdict); + map = bpf_map__fd(drop->maps.sock_map_rx); + /* On drop data is consumed immediately and copied_seq inc'd */ + expected = 0; + } + + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + s = socket_loopback(AF_INET, SOCK_STREAM); + if (!ASSERT_GT(s, -1, "socket_loopback(s)")) + goto out; + err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs(s)")) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + + sent = xsend(p1, &buf, sizeof(buf), 0); + ASSERT_EQ(sent, sizeof(buf), "xsend(p0)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); + /* On DROP test there will be no data to read */ + if (pass_prog) { + recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC); + ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)"); + } + +out_close: + close(c0); + close(p0); + close(c1); + close(p1); +out: + if (pass_prog) + test_sockmap_pass_prog__destroy(pass); + else + test_sockmap_drop_prog__destroy(drop); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -384,4 +509,10 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT); if (test__start_subtest("sockmap skb_verdict progs query")) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); + if (test__start_subtest("sockmap skb_verdict shutdown")) + test_sockmap_skb_verdict_shutdown(); + if (test__start_subtest("sockmap skb_verdict fionread")) + test_sockmap_skb_verdict_fionread(true); + if (test__start_subtest("sockmap skb_verdict fionread on drop")) + test_sockmap_skb_verdict_fionread(false); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h new file mode 100644 index 000000000000..d12665490a90 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -0,0 +1,390 @@ +#ifndef __SOCKMAP_HELPERS__ +#define __SOCKMAP_HELPERS__ + +#include <linux/vm_sockets.h> + +#define IO_TIMEOUT_SEC 30 +#define MAX_STRERR_LEN 256 +#define MAX_TEST_NAME 80 + +/* workaround for older vm_sockets.h */ +#ifndef VMADDR_CID_LOCAL +#define VMADDR_CID_LOCAL 1 +#endif + +#define __always_unused __attribute__((__unused__)) + +#define _FAIL(errnum, fmt...) \ + ({ \ + error_at_line(0, (errnum), __func__, __LINE__, fmt); \ + CHECK_FAIL(true); \ + }) +#define FAIL(fmt...) _FAIL(0, fmt) +#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) +#define FAIL_LIBBPF(err, msg) \ + ({ \ + char __buf[MAX_STRERR_LEN]; \ + libbpf_strerror((err), __buf, sizeof(__buf)); \ + FAIL("%s: %s", (msg), __buf); \ + }) + +/* Wrappers that fail the test on error and report it. */ + +#define xaccept_nonblock(fd, addr, len) \ + ({ \ + int __ret = \ + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("accept"); \ + __ret; \ + }) + +#define xbind(fd, addr, len) \ + ({ \ + int __ret = bind((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("bind"); \ + __ret; \ + }) + +#define xclose(fd) \ + ({ \ + int __ret = close((fd)); \ + if (__ret == -1) \ + FAIL_ERRNO("close"); \ + __ret; \ + }) + +#define xconnect(fd, addr, len) \ + ({ \ + int __ret = connect((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("connect"); \ + __ret; \ + }) + +#define xgetsockname(fd, addr, len) \ + ({ \ + int __ret = getsockname((fd), (addr), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockname"); \ + __ret; \ + }) + +#define xgetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = getsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("getsockopt(" #name ")"); \ + __ret; \ + }) + +#define xlisten(fd, backlog) \ + ({ \ + int __ret = listen((fd), (backlog)); \ + if (__ret == -1) \ + FAIL_ERRNO("listen"); \ + __ret; \ + }) + +#define xsetsockopt(fd, level, name, val, len) \ + ({ \ + int __ret = setsockopt((fd), (level), (name), (val), (len)); \ + if (__ret == -1) \ + FAIL_ERRNO("setsockopt(" #name ")"); \ + __ret; \ + }) + +#define xsend(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = send((fd), (buf), (len), (flags)); \ + if (__ret == -1) \ + FAIL_ERRNO("send"); \ + __ret; \ + }) + +#define xrecv_nonblock(fd, buf, len, flags) \ + ({ \ + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ + IO_TIMEOUT_SEC); \ + if (__ret == -1) \ + FAIL_ERRNO("recv"); \ + __ret; \ + }) + +#define xsocket(family, sotype, flags) \ + ({ \ + int __ret = socket(family, sotype, flags); \ + if (__ret == -1) \ + FAIL_ERRNO("socket"); \ + __ret; \ + }) + +#define xbpf_map_delete_elem(fd, key) \ + ({ \ + int __ret = bpf_map_delete_elem((fd), (key)); \ + if (__ret < 0) \ + FAIL_ERRNO("map_delete"); \ + __ret; \ + }) + +#define xbpf_map_lookup_elem(fd, key, val) \ + ({ \ + int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ + if (__ret < 0) \ + FAIL_ERRNO("map_lookup"); \ + __ret; \ + }) + +#define xbpf_map_update_elem(fd, key, val, flags) \ + ({ \ + int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ + if (__ret < 0) \ + FAIL_ERRNO("map_update"); \ + __ret; \ + }) + +#define xbpf_prog_attach(prog, target, type, flags) \ + ({ \ + int __ret = \ + bpf_prog_attach((prog), (target), (type), (flags)); \ + if (__ret < 0) \ + FAIL_ERRNO("prog_attach(" #type ")"); \ + __ret; \ + }) + +#define xbpf_prog_detach2(prog, target, type) \ + ({ \ + int __ret = bpf_prog_detach2((prog), (target), (type)); \ + if (__ret < 0) \ + FAIL_ERRNO("prog_detach2(" #type ")"); \ + __ret; \ + }) + +#define xpthread_create(thread, attr, func, arg) \ + ({ \ + int __ret = pthread_create((thread), (attr), (func), (arg)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_create"); \ + __ret; \ + }) + +#define xpthread_join(thread, retval) \ + ({ \ + int __ret = pthread_join((thread), (retval)); \ + errno = __ret; \ + if (__ret) \ + FAIL_ERRNO("pthread_join"); \ + __ret; \ + }) + +static inline int poll_read(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set rfds; + int r; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + r = select(fd + 1, &rfds, NULL, NULL, &timeout); + if (r == 0) + errno = ETIME; + + return r == 1 ? 0 : -1; +} + +static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return accept(fd, addr, len); +} + +static inline int recv_timeout(int fd, void *buf, size_t len, int flags, + unsigned int timeout_sec) +{ + if (poll_read(fd, timeout_sec)) + return -1; + + return recv(fd, buf, len, flags); +} + +static inline void init_addr_loopback4(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); + + addr4->sin_family = AF_INET; + addr4->sin_port = 0; + addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + *len = sizeof(*addr4); +} + +static inline void init_addr_loopback6(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = 0; + addr6->sin6_addr = in6addr_loopback; + *len = sizeof(*addr6); +} + +static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss, + socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + +static inline void init_addr_loopback(int family, struct sockaddr_storage *ss, + socklen_t *len) +{ + switch (family) { + case AF_INET: + init_addr_loopback4(ss, len); + return; + case AF_INET6: + init_addr_loopback6(ss, len); + return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; + default: + FAIL("unsupported address family %d", family); + } +} + +static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) +{ + return (struct sockaddr *)ss; +} + +static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) +{ + u64 value; + u32 key; + int err; + + key = 0; + value = fd1; + err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); + if (err) + return err; + + key = 1; + value = fd2; + return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); +} + +static inline int create_pair(int s, int family, int sotype, int *c, int *p) +{ + struct sockaddr_storage addr; + socklen_t len; + int err = 0; + + len = sizeof(addr); + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + *c = xsocket(family, sotype, 0); + if (*c < 0) + return errno; + err = xconnect(*c, sockaddr(&addr), len); + if (err) { + err = errno; + goto close_cli0; + } + + *p = xaccept_nonblock(s, NULL, NULL); + if (*p < 0) { + err = errno; + goto close_cli0; + } + return err; +close_cli0: + close(*c); + return err; +} + +static inline int create_socket_pairs(int s, int family, int sotype, + int *c0, int *c1, int *p0, int *p1) +{ + int err; + + err = create_pair(s, family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(s, family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + return err; +} + +static inline int enable_reuseport(int s, int progfd) +{ + int err, one = 1; + + err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + if (err) + return -1; + err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, + sizeof(progfd)); + if (err) + return -1; + + return 0; +} + +static inline int socket_loopback_reuseport(int family, int sotype, int progfd) +{ + struct sockaddr_storage addr; + socklen_t len; + int err, s; + + init_addr_loopback(family, &addr, &len); + + s = xsocket(family, sotype, 0); + if (s == -1) + return -1; + + if (progfd >= 0) + enable_reuseport(s, progfd); + + err = xbind(s, sockaddr(&addr), len); + if (err) + goto close; + + if (sotype & SOCK_DGRAM) + return s; + + err = xlisten(s, SOMAXCONN); + if (err) + goto close; + + return s; +close: + xclose(s); + return -1; +} + +static inline int socket_loopback(int family, int sotype) +{ + return socket_loopback_reuseport(family, sotype, -1); +} + + +#endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 141c1e5944ee..b4f6f3a50ae5 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -20,11 +20,6 @@ #include <unistd.h> #include <linux/vm_sockets.h> -/* workaround for older vm_sockets.h */ -#ifndef VMADDR_CID_LOCAL -#define VMADDR_CID_LOCAL 1 -#endif - #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -32,315 +27,7 @@ #include "test_progs.h" #include "test_sockmap_listen.skel.h" -#define IO_TIMEOUT_SEC 30 -#define MAX_STRERR_LEN 256 -#define MAX_TEST_NAME 80 - -#define __always_unused __attribute__((__unused__)) - -#define _FAIL(errnum, fmt...) \ - ({ \ - error_at_line(0, (errnum), __func__, __LINE__, fmt); \ - CHECK_FAIL(true); \ - }) -#define FAIL(fmt...) _FAIL(0, fmt) -#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt) -#define FAIL_LIBBPF(err, msg) \ - ({ \ - char __buf[MAX_STRERR_LEN]; \ - libbpf_strerror((err), __buf, sizeof(__buf)); \ - FAIL("%s: %s", (msg), __buf); \ - }) - -/* Wrappers that fail the test on error and report it. */ - -#define xaccept_nonblock(fd, addr, len) \ - ({ \ - int __ret = \ - accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("accept"); \ - __ret; \ - }) - -#define xbind(fd, addr, len) \ - ({ \ - int __ret = bind((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("bind"); \ - __ret; \ - }) - -#define xclose(fd) \ - ({ \ - int __ret = close((fd)); \ - if (__ret == -1) \ - FAIL_ERRNO("close"); \ - __ret; \ - }) - -#define xconnect(fd, addr, len) \ - ({ \ - int __ret = connect((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("connect"); \ - __ret; \ - }) - -#define xgetsockname(fd, addr, len) \ - ({ \ - int __ret = getsockname((fd), (addr), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockname"); \ - __ret; \ - }) - -#define xgetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = getsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("getsockopt(" #name ")"); \ - __ret; \ - }) - -#define xlisten(fd, backlog) \ - ({ \ - int __ret = listen((fd), (backlog)); \ - if (__ret == -1) \ - FAIL_ERRNO("listen"); \ - __ret; \ - }) - -#define xsetsockopt(fd, level, name, val, len) \ - ({ \ - int __ret = setsockopt((fd), (level), (name), (val), (len)); \ - if (__ret == -1) \ - FAIL_ERRNO("setsockopt(" #name ")"); \ - __ret; \ - }) - -#define xsend(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = send((fd), (buf), (len), (flags)); \ - if (__ret == -1) \ - FAIL_ERRNO("send"); \ - __ret; \ - }) - -#define xrecv_nonblock(fd, buf, len, flags) \ - ({ \ - ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ - IO_TIMEOUT_SEC); \ - if (__ret == -1) \ - FAIL_ERRNO("recv"); \ - __ret; \ - }) - -#define xsocket(family, sotype, flags) \ - ({ \ - int __ret = socket(family, sotype, flags); \ - if (__ret == -1) \ - FAIL_ERRNO("socket"); \ - __ret; \ - }) - -#define xbpf_map_delete_elem(fd, key) \ - ({ \ - int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret < 0) \ - FAIL_ERRNO("map_delete"); \ - __ret; \ - }) - -#define xbpf_map_lookup_elem(fd, key, val) \ - ({ \ - int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret < 0) \ - FAIL_ERRNO("map_lookup"); \ - __ret; \ - }) - -#define xbpf_map_update_elem(fd, key, val, flags) \ - ({ \ - int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret < 0) \ - FAIL_ERRNO("map_update"); \ - __ret; \ - }) - -#define xbpf_prog_attach(prog, target, type, flags) \ - ({ \ - int __ret = \ - bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret < 0) \ - FAIL_ERRNO("prog_attach(" #type ")"); \ - __ret; \ - }) - -#define xbpf_prog_detach2(prog, target, type) \ - ({ \ - int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret < 0) \ - FAIL_ERRNO("prog_detach2(" #type ")"); \ - __ret; \ - }) - -#define xpthread_create(thread, attr, func, arg) \ - ({ \ - int __ret = pthread_create((thread), (attr), (func), (arg)); \ - errno = __ret; \ - if (__ret) \ - FAIL_ERRNO("pthread_create"); \ - __ret; \ - }) - -#define xpthread_join(thread, retval) \ - ({ \ - int __ret = pthread_join((thread), (retval)); \ - errno = __ret; \ - if (__ret) \ - FAIL_ERRNO("pthread_join"); \ - __ret; \ - }) - -static int poll_read(int fd, unsigned int timeout_sec) -{ - struct timeval timeout = { .tv_sec = timeout_sec }; - fd_set rfds; - int r; - - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - r = select(fd + 1, &rfds, NULL, NULL, &timeout); - if (r == 0) - errno = ETIME; - - return r == 1 ? 0 : -1; -} - -static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return accept(fd, addr, len); -} - -static int recv_timeout(int fd, void *buf, size_t len, int flags, - unsigned int timeout_sec) -{ - if (poll_read(fd, timeout_sec)) - return -1; - - return recv(fd, buf, len, flags); -} - -static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) -{ - struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); - - addr4->sin_family = AF_INET; - addr4->sin_port = 0; - addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); - *len = sizeof(*addr4); -} - -static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) -{ - struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss)); - - addr6->sin6_family = AF_INET6; - addr6->sin6_port = 0; - addr6->sin6_addr = in6addr_loopback; - *len = sizeof(*addr6); -} - -static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) -{ - struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); - - addr->svm_family = AF_VSOCK; - addr->svm_port = VMADDR_PORT_ANY; - addr->svm_cid = VMADDR_CID_LOCAL; - *len = sizeof(*addr); -} - -static void init_addr_loopback(int family, struct sockaddr_storage *ss, - socklen_t *len) -{ - switch (family) { - case AF_INET: - init_addr_loopback4(ss, len); - return; - case AF_INET6: - init_addr_loopback6(ss, len); - return; - case AF_VSOCK: - init_addr_loopback_vsock(ss, len); - return; - default: - FAIL("unsupported address family %d", family); - } -} - -static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss) -{ - return (struct sockaddr *)ss; -} - -static int enable_reuseport(int s, int progfd) -{ - int err, one = 1; - - err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); - if (err) - return -1; - err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd, - sizeof(progfd)); - if (err) - return -1; - - return 0; -} - -static int socket_loopback_reuseport(int family, int sotype, int progfd) -{ - struct sockaddr_storage addr; - socklen_t len; - int err, s; - - init_addr_loopback(family, &addr, &len); - - s = xsocket(family, sotype, 0); - if (s == -1) - return -1; - - if (progfd >= 0) - enable_reuseport(s, progfd); - - err = xbind(s, sockaddr(&addr), len); - if (err) - goto close; - - if (sotype & SOCK_DGRAM) - return s; - - err = xlisten(s, SOMAXCONN); - if (err) - goto close; - - return s; -close: - xclose(s); - return -1; -} - -static int socket_loopback(int family, int sotype) -{ - return socket_loopback_reuseport(family, sotype, -1); -} +#include "sockmap_helpers.h" static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, int family, int sotype, int mapfd) @@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode) } } -static int add_to_sockmap(int sock_mapfd, int fd1, int fd2) -{ - u64 value; - u32 key; - int err; - - key = 0; - value = fd1; - err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); - if (err) - return err; - - key = 1; - value = fd2; - return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); -} - static void redir_to_connected(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - struct sockaddr_storage addr; int s, c0, c1, p0, p1; unsigned int pass; - socklen_t len; int err, n; u32 key; char b; @@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (s < 0) return; - len = sizeof(addr); - err = xgetsockname(s, sockaddr(&addr), &len); + err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); if (err) goto close_srv; - c0 = xsocket(family, sotype, 0); - if (c0 < 0) - goto close_srv; - err = xconnect(c0, sockaddr(&addr), len); - if (err) - goto close_cli0; - - p0 = xaccept_nonblock(s, NULL, NULL); - if (p0 < 0) - goto close_cli0; - - c1 = xsocket(family, sotype, 0); - if (c1 < 0) - goto close_peer0; - err = xconnect(c1, sockaddr(&addr), len); - if (err) - goto close_cli1; - - p1 = xaccept_nonblock(s, NULL, NULL); - if (p1 < 0) - goto close_cli1; - err = add_to_sockmap(sock_mapfd, p0, p1); if (err) - goto close_peer1; + goto close; n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); if (n < 0) @@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (n == 0) FAIL("%s: incomplete write", log_prefix); if (n < 1) - goto close_peer1; + goto close; key = SK_PASS; err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); if (err) - goto close_peer1; + goto close; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); @@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, if (n == 0) FAIL("%s: incomplete recv", log_prefix); -close_peer1: +close: xclose(p1); -close_cli1: xclose(c1); -close_peer0: xclose(p0); -close_cli0: xclose(c0); close_srv: xclose(s); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index aa4debf62fc6..9e6a5e3ed4de 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -5,10 +5,15 @@ static char bpf_log_buf[4096]; static bool verbose; +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + enum sockopt_test_error { OK = 0, DENY_LOAD, DENY_ATTACH, + EOPNOTSUPP_GETSOCKOPT, EPERM_GETSOCKOPT, EFAULT_GETSOCKOPT, EPERM_SETSOCKOPT, @@ -273,10 +278,31 @@ static struct sockopt_test { .error = EFAULT_GETSOCKOPT, }, { - .descr = "getsockopt: deny arbitrary ctx->retval", + .descr = "getsockopt: ignore >PAGE_SIZE optlen", .insns = { - /* ctx->retval = 123 */ - BPF_MOV64_IMM(BPF_REG_0, 123), + /* write 0xFF to the first optval byte */ + + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 0xF0 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF), + /* } */ + + /* retval changes are ignored */ + /* ctx->retval = 5 */ + BPF_MOV64_IMM(BPF_REG_0, 5), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct bpf_sockopt, retval)), @@ -287,9 +313,11 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, - .get_optlen = 64, - - .error = EFAULT_GETSOCKOPT, + .get_level = 1234, + .get_optname = 5678, + .get_optval = {}, /* the changes are ignored */ + .get_optlen = PAGE_SIZE + 1, + .error = EOPNOTSUPP_GETSOCKOPT, }, { .descr = "getsockopt: support smaller ctx->optlen", @@ -649,6 +677,45 @@ static struct sockopt_test { .error = EFAULT_SETSOCKOPT, }, { + .descr = "setsockopt: ignore >PAGE_SIZE optlen", + .insns = { + /* write 0xFF to the first optval byte */ + + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 0xF0 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0), + /* } */ + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_level = SOL_IP, + .set_optname = IP_TOS, + .set_optval = {}, + .set_optlen = PAGE_SIZE + 1, + + .get_level = SOL_IP, + .get_optname = IP_TOS, + .get_optval = {}, /* the changes are ignored */ + .get_optlen = 4, + }, + { .descr = "setsockopt: allow changing ctx->optlen within bounds", .insns = { /* r6 = ctx->optval */ @@ -906,6 +973,13 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) } if (test->set_optlen) { + if (test->set_optlen >= PAGE_SIZE) { + int num_pages = test->set_optlen / PAGE_SIZE; + int remainder = test->set_optlen % PAGE_SIZE; + + test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; + } + err = setsockopt(sock_fd, test->set_level, test->set_optname, test->set_optval, test->set_optlen); if (err) { @@ -921,7 +995,15 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) } if (test->get_optlen) { + if (test->get_optlen >= PAGE_SIZE) { + int num_pages = test->get_optlen / PAGE_SIZE; + int remainder = test->get_optlen % PAGE_SIZE; + + test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; + } + optval = malloc(test->get_optlen); + memset(optval, 0, test->get_optlen); socklen_t optlen = test->get_optlen; socklen_t expected_get_optlen = test->get_optlen_ret ?: test->get_optlen; @@ -929,6 +1011,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) err = getsockopt(sock_fd, test->get_level, test->get_optname, optval, &optlen); if (err) { + if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT) + goto free_optval; if (errno == EPERM && test->error == EPERM_GETSOCKOPT) goto free_optval; if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT) @@ -976,7 +1060,9 @@ void test_sockopt(void) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { - test__start_subtest(tests[i].descr); + if (!test__start_subtest(tests[i].descr)) + continue; + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 60c17a8e2789..917f486db826 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -2,6 +2,8 @@ #include <test_progs.h> #include "cgroup_helpers.h" +#include "sockopt_inherit.skel.h" + #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 #define CUSTOM_INHERIT2 1 @@ -132,58 +134,30 @@ static int start_server(void) return fd; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, - const char *prog_name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", prog_name); - return -1; - } - - prog = bpf_object__find_program_by_name(obj, prog_name); - if (!prog) { - log_err("Failed to find %s BPF program", prog_name); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, 0); - if (err) { - log_err("Failed to attach %s BPF program", prog_name); - return -1; - } - - return 0; -} - static void run_test(int cgroup_fd) { + struct bpf_link *link_getsockopt = NULL; + struct bpf_link *link_setsockopt = NULL; int server_fd = -1, client_fd; - struct bpf_object *obj; + struct sockopt_inherit *obj; void *server_err; pthread_t tid; int err; - obj = bpf_object__open_file("sockopt_inherit.bpf.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_open")) + obj = sockopt_inherit__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) return; - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) - goto close_bpf_object; + obj->bss->page_size = sysconf(_SC_PAGESIZE); - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); - if (!ASSERT_OK(err, "prog_attach _getsockopt")) + link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt, + cgroup_fd); + if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt")) goto close_bpf_object; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); - if (!ASSERT_OK(err, "prog_attach _setsockopt")) + link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt, + cgroup_fd); + if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt")) goto close_bpf_object; server_fd = start_server(); @@ -217,7 +191,10 @@ static void run_test(int cgroup_fd) close_server_fd: close(server_fd); close_bpf_object: - bpf_object__close(obj); + bpf_link__destroy(link_getsockopt); + bpf_link__destroy(link_setsockopt); + + sockopt_inherit__destroy(obj); } void test_sockopt_inherit(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 7f5659349011..759bbb6f8c5f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -2,61 +2,13 @@ #include <test_progs.h> #include "cgroup_helpers.h" -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", title); - return -1; - } - - prog = bpf_object__find_program_by_name(obj, name); - if (!prog) { - log_err("Failed to find %s BPF program", name); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, BPF_F_ALLOW_MULTI); - if (err) { - log_err("Failed to attach %s BPF program", name); - return -1; - } - - return 0; -} +#include "sockopt_multi.skel.h" -static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) - return -1; - - prog = bpf_object__find_program_by_name(obj, name); - if (!prog) - return -1; - - err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd, - attach_type); - if (err) - return -1; - - return 0; -} - -static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, +static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent, int cg_child, int sock_fd) { + struct bpf_link *link_parent = NULL; + struct bpf_link *link_child = NULL; socklen_t optlen; __u8 buf; int err; @@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - child: 0x80 -> 0x90 */ - err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - if (err) + link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child, + cg_child); + if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child")) goto detach; buf = 0x00; @@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: 0x90 -> 0xA0 */ - err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); - if (err) + link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent, + cg_parent); + if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent")) goto detach; buf = 0x00; @@ -157,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: unexpected 0x40, EPERM */ - err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - if (err) { - log_err("Failed to detach child program"); - goto detach; - } + bpf_link__destroy(link_child); + link_child = NULL; buf = 0x00; optlen = 1; @@ -198,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); + bpf_link__destroy(link_child); + bpf_link__destroy(link_parent); return err; } -static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, +static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent, int cg_child, int sock_fd) { + struct bpf_link *link_parent = NULL; + struct bpf_link *link_child = NULL; socklen_t optlen; __u8 buf; int err; @@ -236,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach child program and make sure it adds 0x10. */ - err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); - if (err) + link_child = bpf_program__attach_cgroup(obj->progs._setsockopt, + cg_child); + if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child")) goto detach; buf = 0x80; @@ -263,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach parent program and make sure it adds another 0x10. */ - err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); - if (err) + link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt, + cg_parent); + if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent")) goto detach; buf = 0x80; @@ -289,8 +244,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); - prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); + bpf_link__destroy(link_child); + bpf_link__destroy(link_parent); return err; } @@ -298,9 +253,8 @@ detach: void test_sockopt_multi(void) { int cg_parent = -1, cg_child = -1; - struct bpf_object *obj = NULL; + struct sockopt_multi *obj = NULL; int sock_fd = -1; - int err = -1; cg_parent = test__join_cgroup("/parent"); if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) @@ -310,13 +264,11 @@ void test_sockopt_multi(void) if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) goto out; - obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_load")) + obj = sockopt_multi__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) goto out; - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) - goto out; + obj->bss->page_size = sysconf(_SC_PAGESIZE); sock_fd = socket(AF_INET, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) @@ -327,7 +279,7 @@ void test_sockopt_multi(void) out: close(sock_fd); - bpf_object__close(obj); + sockopt_multi__destroy(obj); close(cg_child); close(cg_parent); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c index 6b53b3cb8dad..6b2d300e9fd4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c @@ -42,6 +42,8 @@ void test_sockopt_qos_to_cc(void) if (!ASSERT_OK_PTR(skel, "skel")) goto done; + skel->bss->page_size = sysconf(_SC_PAGESIZE); + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "v6 socket open")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 4512dd808c33..05d0e07da394 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -209,7 +209,7 @@ static int getsetsockopt(void) err, errno); goto err; } - ASSERT_EQ(optlen, 4, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); + ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value"); free(big_buf); close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c new file mode 100644 index 000000000000..4224727fb364 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include <cgroup_helpers.h> +#include "test_task_under_cgroup.skel.h" + +#define FOO "/foo" + +void test_task_under_cgroup(void) +{ + struct test_task_under_cgroup *skel; + int ret, foo; + pid_t pid; + + foo = test__join_cgroup(FOO); + if (!ASSERT_OK(foo < 0, "cgroup_join_foo")) + return; + + skel = test_task_under_cgroup__open(); + if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open")) + goto cleanup; + + skel->rodata->local_pid = getpid(); + skel->bss->remote_pid = getpid(); + skel->rodata->cgid = get_cgroup_id(FOO); + + ret = test_task_under_cgroup__load(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__load")) + goto cleanup; + + ret = test_task_under_cgroup__attach(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__attach")) + goto cleanup; + + pid = fork(); + if (pid == 0) + exit(0); + + ret = (pid == -1); + if (ASSERT_OK(ret, "fork process")) + wait(NULL); + + test_task_under_cgroup__detach(skel); + + ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid, + "test task_under_cgroup"); + +cleanup: + test_task_under_cgroup__destroy(skel); + close(foo); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 2497716ee379..531621adef42 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -55,6 +55,7 @@ #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" #include "verifier_uninit.skel.h" #include "verifier_unpriv.skel.h" @@ -154,6 +155,7 @@ void test_verifier_sock(void) { RUN(verifier_sock); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } void test_verifier_uninit(void) { RUN(verifier_uninit); } void test_verifier_unpriv(void) { RUN(verifier_unpriv); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index d19f79048ff6..c3b45745cbcc 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -18,6 +18,7 @@ #include <linux/if_bonding.h> #include <linux/limits.h> #include <linux/udp.h> +#include <uapi/linux/netdev.h> #include "xdp_dummy.skel.h" #include "xdp_redirect_multi_kern.skel.h" @@ -492,6 +493,123 @@ out: system("ip link del bond_nest2"); } +static void test_xdp_bonding_features(struct skeletons *skeletons) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int bond_idx, veth1_idx, err; + struct bpf_link *link = NULL; + + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + bond_idx = if_nametoindex("bond"); + if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond")) + goto out; + + /* query default xdp-feature for bond device */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"), + "add veth{0,1} pair")) + goto out; + + if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"), + "add veth{2,3} pair")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 master bond"), + "add veth0 to master bond")) + goto out; + + /* xdp-feature for bond device should be obtained from the single slave + * device (veth0) + */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + veth1_idx = if_nametoindex("veth1"); + if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1")) + goto out; + + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, + veth1_idx); + if (!ASSERT_OK_PTR(link, "attach program to veth1")) + goto out; + + /* xdp-feature for veth0 are changed */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 master bond"), + "add veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + /* xdp-feature for bond device should be set to the most restrict + * value obtained from attached slave devices (veth0 and veth2) + */ + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 nomaster"), + "del veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 nomaster"), + "del veth0 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags"); +out: + bpf_link__destroy(link); + system("ip link del veth0"); + system("ip link del veth2"); + system("ip link del bond"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -546,6 +664,9 @@ void serial_test_xdp_bonding(void) if (test__start_subtest("xdp_bonding_nested")) test_xdp_bonding_nested(&skeletons); + if (test__start_subtest("xdp_bonding_features")) + test_xdp_bonding_features(&skeletons); + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { struct bond_test_case *test_case = &bond_test_cases[i]; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index d3c1217ba79a..38a57a2e70db 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -86,6 +86,10 @@ #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#ifndef __used +#define __used __attribute__((used)) +#endif + #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__x64_" diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 50f95ec61165..76d661b20e87 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr *ptr; @@ -14,9 +15,6 @@ struct { __uint(max_entries, 16); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - static __noinline int cb1(void *map, void *key, void *value, void *ctx) { void *p = *(void **)ctx; diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c index b2a409e6382a..932b8ecd4ae3 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c @@ -12,6 +12,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; __u32 ctx_retval_value = 0; +__u32 page_size = 0; SEC("cgroup/getsockopt") int get_retval(struct bpf_sockopt *ctx) @@ -20,6 +21,10 @@ int get_retval(struct bpf_sockopt *ctx) ctx_retval_value = ctx->retval; __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -31,6 +36,10 @@ int set_eisconn(struct bpf_sockopt *ctx) if (bpf_set_retval(-EISCONN)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -41,5 +50,9 @@ int clear_retval(struct bpf_sockopt *ctx) ctx->retval = 0; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c index d6e5903e06ba..b7fa8804e19d 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -11,6 +11,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; +__u32 page_size = 0; SEC("cgroup/setsockopt") int get_retval(struct bpf_sockopt *ctx) @@ -18,6 +19,10 @@ int get_retval(struct bpf_sockopt *ctx) retval_value = bpf_get_retval(); __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -29,6 +34,10 @@ int set_eunatch(struct bpf_sockopt *ctx) if (bpf_set_retval(-EUNATCH)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } @@ -40,6 +49,10 @@ int set_eisconn(struct bpf_sockopt *ctx) if (bpf_set_retval(-EISCONN)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } @@ -48,5 +61,9 @@ int legacy_eperm(struct bpf_sockopt *ctx) { __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 759eb5c245cd..7ce7e827d5f0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -3,6 +3,7 @@ #include <errno.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> @@ -1378,3 +1379,310 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) return 0; } + +/* bpf_dynptr_adjust can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_adjust_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_adjust(&ptr, 1, 2); + + return 0; +} + +/* bpf_dynptr_is_null can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_is_null_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_is_null(&ptr); + + return 0; +} + +/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_is_rdonly_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_is_rdonly(&ptr); + + return 0; +} + +/* bpf_dynptr_size can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_size_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_size(&ptr); + + return 0; +} + +/* Only initialized dynptrs can be cloned */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int clone_invalid1(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + + /* this should fail */ + bpf_dynptr_clone(&ptr1, &ptr2); + + return 0; +} + +/* Can't overwrite an existing dynptr when cloning */ +SEC("?xdp") +__failure __msg("cannot overwrite referenced dynptr") +int clone_invalid2(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr clone; + + bpf_dynptr_from_xdp(xdp, 0, &ptr1); + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone); + + /* this should fail */ + bpf_dynptr_clone(&ptr1, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its clones */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate1(void *ctx) +{ + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its parent */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate2(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its siblings */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate3(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone1; + struct bpf_dynptr clone2; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone1); + + bpf_dynptr_clone(&ptr, &clone2); + + bpf_ringbuf_submit_dynptr(&clone2, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its clones + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate4(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_data(&clone, 0, sizeof(val)); + if (!data) + return 0; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its parent + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate5(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + data = bpf_dynptr_data(&ptr, 0, sizeof(val)); + if (!data) + return 0; + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its sibling + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate6(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone1; + struct bpf_dynptr clone2; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone1); + + bpf_dynptr_clone(&ptr, &clone2); + + data = bpf_dynptr_data(&clone1, 0, sizeof(val)); + if (!data) + return 0; + + bpf_ringbuf_submit_dynptr(&clone2, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* A skb clone's data slices should be invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_data(struct __sk_buff *skb) +{ + char buffer[sizeof(__u32)] = {}; + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + __u32 *data; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer)); + if (!data) + return XDP_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *data = 123; + + return 0; +} + +/* A xdp clone's data slices should be invalid anytime packet data changes */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int clone_xdp_packet_data(struct xdp_md *xdp) +{ + char buffer[sizeof(__u32)] = {}; + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + __u32 *data; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer)); + if (!data) + return XDP_DROP; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Buffers that are provided must be sufficiently long */ +SEC("?cgroup_skb/egress") +__failure __msg("memory, len pair leads to invalid memory access") +int test_dynptr_skb_small_buff(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + char buffer[8] = {}; + __u64 *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, buffer, 9); + + return !!data; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index b2fa6c47ecc0..5985920d162e 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Facebook */ #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -207,3 +208,339 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_adjust(void *ctx) +{ + struct bpf_dynptr ptr; + __u32 bytes = 64; + __u32 off = 10; + __u32 trim = 15; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr); + if (err) { + err = 1; + goto done; + } + + if (bpf_dynptr_size(&ptr) != bytes) { + err = 2; + goto done; + } + + /* Advance the dynptr by off */ + err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr)); + if (err) { + err = 3; + goto done; + } + + if (bpf_dynptr_size(&ptr) != bytes - off) { + err = 4; + goto done; + } + + /* Trim the dynptr */ + err = bpf_dynptr_adjust(&ptr, off, 15); + if (err) { + err = 5; + goto done; + } + + /* Check that the size was adjusted correctly */ + if (bpf_dynptr_size(&ptr) != trim - off) { + err = 6; + goto done; + } + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_adjust_err(void *ctx) +{ + char write_data[45] = "hello there, world!!"; + struct bpf_dynptr ptr; + __u32 size = 64; + __u32 off = 20; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) { + err = 1; + goto done; + } + + /* Check that start can't be greater than end */ + if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) { + err = 2; + goto done; + } + + /* Check that start can't be greater than size */ + if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) { + err = 3; + goto done; + } + + /* Check that end can't be greater than size */ + if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) { + err = 4; + goto done; + } + + if (bpf_dynptr_adjust(&ptr, off, size)) { + err = 5; + goto done; + } + + /* Check that you can't write more bytes than available into the dynptr + * after you've adjusted it + */ + if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) { + err = 6; + goto done; + } + + /* Check that even after adjusting, submitting/discarding + * a ringbuf dynptr works + */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_zero_size_dynptr(void *ctx) +{ + char write_data = 'x', read_data; + struct bpf_dynptr ptr; + __u32 size = 64; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) { + err = 1; + goto done; + } + + /* After this, the dynptr has a size of 0 */ + if (bpf_dynptr_adjust(&ptr, size, size)) { + err = 2; + goto done; + } + + /* Test that reading + writing non-zero bytes is not ok */ + if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) { + err = 3; + goto done; + } + + if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) { + err = 4; + goto done; + } + + /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */ + if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) { + err = 5; + goto done; + } + + if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) { + err = 6; + goto done; + } + + err = 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_dynptr_is_null(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u64 size = 4; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + /* Pass in invalid flags, get back an invalid dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) { + err = 1; + goto exit_early; + } + + /* Test that the invalid dynptr is null */ + if (!bpf_dynptr_is_null(&ptr1)) { + err = 2; + goto exit_early; + } + + /* Get a valid dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) { + err = 3; + goto exit; + } + + /* Test that the valid dynptr is not null */ + if (bpf_dynptr_is_null(&ptr2)) { + err = 4; + goto exit; + } + +exit: + bpf_ringbuf_discard_dynptr(&ptr2, 0); +exit_early: + bpf_ringbuf_discard_dynptr(&ptr1, 0); + return 0; +} + +SEC("cgroup_skb/egress") +int test_dynptr_is_rdonly(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + struct bpf_dynptr ptr3; + + /* Pass in invalid flags, get back an invalid dynptr */ + if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) { + err = 1; + return 0; + } + + /* Test that an invalid dynptr is_rdonly returns false */ + if (bpf_dynptr_is_rdonly(&ptr1)) { + err = 2; + return 0; + } + + /* Get a read-only dynptr */ + if (bpf_dynptr_from_skb(skb, 0, &ptr2)) { + err = 3; + return 0; + } + + /* Test that the dynptr is read-only */ + if (!bpf_dynptr_is_rdonly(&ptr2)) { + err = 4; + return 0; + } + + /* Get a read-writeable dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) { + err = 5; + goto done; + } + + /* Test that the dynptr is read-only */ + if (bpf_dynptr_is_rdonly(&ptr3)) { + err = 6; + goto done; + } + +done: + bpf_ringbuf_discard_dynptr(&ptr3, 0); + return 0; +} + +SEC("cgroup_skb/egress") +int test_dynptr_clone(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u32 off = 2, size; + + /* Get a dynptr */ + if (bpf_dynptr_from_skb(skb, 0, &ptr1)) { + err = 1; + return 0; + } + + if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) { + err = 2; + return 0; + } + + /* Clone the dynptr */ + if (bpf_dynptr_clone(&ptr1, &ptr2)) { + err = 3; + return 0; + } + + size = bpf_dynptr_size(&ptr1); + + /* Check that the clone has the same size and rd-only */ + if (bpf_dynptr_size(&ptr2) != size) { + err = 4; + return 0; + } + + if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) { + err = 5; + return 0; + } + + /* Advance and trim the original dynptr */ + bpf_dynptr_adjust(&ptr1, 5, 5); + + /* Check that only original dynptr was affected, and the clone wasn't */ + if (bpf_dynptr_size(&ptr2) != size) { + err = 6; + return 0; + } + + return 0; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_no_buff(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + __u64 *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, NULL, 1); + + return !!data; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_strcmp(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + char *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, NULL, 10); + if (data) { + bpf_strncmp(data, 10, "foo"); + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/inner_array_lookup.c b/tools/testing/selftests/bpf/progs/inner_array_lookup.c new file mode 100644 index 000000000000..c2c8f2fa451d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/inner_array_lookup.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map1 SEC(".maps"); + +struct outer_map { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 3); + __type(key, int); + __array(values, struct inner_map); +} outer_map1 SEC(".maps") = { + .values = { + [2] = &inner_map1, + }, +}; + +SEC("raw_tp/sys_enter") +int handle__sys_enter(void *ctx) +{ + int outer_key = 2, inner_key = 3; + int *val; + void *map; + + map = bpf_map_lookup_elem(&outer_map1, &outer_key); + if (!map) + return 1; + + val = bpf_map_lookup_elem(map, &inner_key); + if (!val) + return 1; + + if (*val == 1) + *val = 2; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index be16143ae292..6b9b3c56f009 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -651,29 +651,25 @@ int iter_stack_array_loop(const void *ctx) return sum; } -#define ARR_SZ 16 - -static __noinline void fill(struct bpf_iter_num *it, int *arr, int mul) +static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) { - int *t; - __u64 i; + int *t, i; while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= ARR_SZ) + if (i >= n) break; arr[i] = i * mul; } } -static __noinline int sum(struct bpf_iter_num *it, int *arr) +static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) { - int *t, sum = 0;; - __u64 i; + int *t, i, sum = 0;; while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= ARR_SZ) + if (i >= n) break; sum += arr[i]; } @@ -685,7 +681,7 @@ SEC("raw_tp") __success int iter_pass_iter_ptr_to_subprog(const void *ctx) { - int arr1[ARR_SZ], arr2[ARR_SZ]; + int arr1[16], arr2[32]; struct bpf_iter_num it; int n, sum1, sum2; @@ -694,25 +690,25 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx) /* fill arr1 */ n = ARRAY_SIZE(arr1); bpf_iter_num_new(&it, 0, n); - fill(&it, arr1, 2); + fill(&it, arr1, n, 2); bpf_iter_num_destroy(&it); /* fill arr2 */ n = ARRAY_SIZE(arr2); bpf_iter_num_new(&it, 0, n); - fill(&it, arr2, 10); + fill(&it, arr2, n, 10); bpf_iter_num_destroy(&it); /* sum arr1 */ n = ARRAY_SIZE(arr1); bpf_iter_num_new(&it, 0, n); - sum1 = sum(&it, arr1); + sum1 = sum(&it, arr1, n); bpf_iter_num_destroy(&it); /* sum arr2 */ n = ARRAY_SIZE(arr2); bpf_iter_num_new(&it, 0, n); - sum2 = sum(&it, arr2); + sum2 = sum(&it, arr2, n); bpf_iter_num_destroy(&it); bpf_printk("sum1=%d, sum2=%d", sum1, sum2); diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 13f00ca2ed0a..f9789e668297 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -3,13 +3,11 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("tc") int test_jit_probe_mem(struct __sk_buff *ctx) { diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c index 767472bc5a97..7632d9ecb253 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_kfunc_call_test_destructive(void) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_destructive_test(void) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c index b98313d391c6..4b0b7b79cdfb 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -2,14 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct syscall_test_args { __u8 data[16]; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c index 4e8fed75a4e0..d532af07decf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_fail(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 7daa8f5720b9..cf68d1e48a0f 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -2,22 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; -extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; -extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; -extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index c1fdecabeabf..2380c75e74ce 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -1,13 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" extern const int bpf_prog_active __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; int active_res = -1; int sk_state_res = -1; @@ -28,7 +23,7 @@ int __noinline f1(struct __sk_buff *skb) if (active) active_res = *active; - sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state; + sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 0ef286da092b..06838083079c 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -5,7 +5,8 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> -#include "bpf_experimental.h" +#include "../bpf_experimental.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct node_data { long key; @@ -32,8 +33,6 @@ struct map_value { */ struct node_data *just_here_because_btf_bug; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index d7150041e5d1..da30f0d59364 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; @@ -114,10 +115,6 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; - #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) static void test_kptr_unref(struct map_value *v) diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index da8c724f839b..450bb373b179 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_misc.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { char buf[8]; @@ -19,9 +20,6 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("?tc") __failure __msg("kptr access size must be BPF_DW") int size_not_bpf_dw(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c new file mode 100644 index 000000000000..9e0bf7a54cec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "bpf_tracing_net.h" + +__be16 serv_port = 0; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} tcp_conn_sockets SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} udp_conn_sockets SEC(".maps"); + +SEC("cgroup/connect6") +int sock_connect(struct bpf_sock_addr *ctx) +{ + __u64 sock_cookie = 0; + int key = 0; + __u32 keyc = 0; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sock_cookie = bpf_get_socket_cookie(ctx); + if (ctx->protocol == IPPROTO_TCP) + bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0); + else if (ctx->protocol == IPPROTO_UDP) + bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0); + else + return 1; + + return 1; +} + +SEC("iter/tcp") +int iter_tcp6_client(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + __u64 sock_cookie = 0; + __u64 *val; + int key = 0; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + val = bpf_map_lookup_elem(&tcp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy(sk_common); + + return 0; +} + +SEC("iter/tcp") +int iter_tcp6_server(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + const struct inet_connection_sock *icsk; + const struct inet_sock *inet; + struct tcp6_sock *tcp_sk; + __be16 srcp; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + tcp_sk = bpf_skc_to_tcp6_sock(sk_common); + if (!tcp_sk) + return 0; + + icsk = &tcp_sk->tcp.inet_conn; + inet = &icsk->icsk_inet; + srcp = inet->inet_sport; + + /* Destroy server sockets. */ + if (srcp == serv_port) + bpf_sock_destroy(sk_common); + + return 0; +} + + +SEC("iter/udp") +int iter_udp6_client(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + __u64 sock_cookie = 0, *val; + int key = 0; + + if (!sk) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk); + val = bpf_map_lookup_elem(&udp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +SEC("iter/udp") +int iter_udp6_server(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + struct inet_sock *inet; + __be16 srcp; + + if (!sk) + return 0; + + inet = &udp_sk->inet; + srcp = inet->inet_sport; + if (srcp == serv_port) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c new file mode 100644 index 000000000000..dd6850b58e25 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +SEC("tp_btf/tcp_destroy_sock") +__failure __msg("calling kernel function bpf_sock_destroy is not allowed") +int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk) +{ + /* should not load */ + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index 9fb241b97291..c8f59caa4639 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL"; #define CUSTOM_INHERIT2 1 #define CUSTOM_LISTENER 2 +__u32 page_size = 0; + struct sockopt_inherit { __u8 val; }; @@ -55,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_CUSTOM) - return 1; /* only interested in SOL_CUSTOM */ + goto out; /* only interested in SOL_CUSTOM */ if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -70,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -80,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_CUSTOM) - return 1; /* only interested in SOL_CUSTOM */ + goto out; /* only interested in SOL_CUSTOM */ if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -93,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx) ctx->optlen = -1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 177a59069dae..96f29fce050b 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -5,6 +5,8 @@ char _license[] SEC("license") = "GPL"; +__u32 page_size = 0; + SEC("cgroup/getsockopt") int _getsockopt_child(struct bpf_sockopt *ctx) { @@ -12,7 +14,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -26,6 +28,12 @@ int _getsockopt_child(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/getsockopt") @@ -35,7 +43,7 @@ int _getsockopt_parent(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -49,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -58,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -67,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c index 1bce83b6e3a7..dbe235ede7f3 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c @@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL"; +__u32 page_size = 0; + SEC("cgroup/setsockopt") int sockopt_qos_to_cc(struct bpf_sockopt *ctx) { @@ -19,7 +21,7 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) char cc_cubic[TCP_CA_NAME_MAX] = "cubic"; if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -36,4 +38,10 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) return 0; } return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index fe1df4cd206e..cb990a7d3d45 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -37,7 +37,7 @@ int _getsockopt(struct bpf_sockopt *ctx) /* Bypass AF_NETLINK. */ sk = ctx->sk; if (sk && sk->family == AF_NETLINK) - return 1; + goto out; /* Make sure bpf_get_netns_cookie is callable. */ @@ -52,8 +52,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ - return 1; + goto out; } if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { @@ -61,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - return 1; + goto out; } if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { @@ -69,7 +68,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - return 1; + goto out; } if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { @@ -85,7 +84,7 @@ int _getsockopt(struct bpf_sockopt *ctx) if (((struct tcp_zerocopy_receive *)optval)->address != 0) return 0; /* unexpected data */ - return 1; + goto out; } if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { @@ -129,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -142,7 +147,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Bypass AF_NETLINK. */ sk = ctx->sk; if (sk && sk->family == AF_NETLINK) - return 1; + goto out; /* Make sure bpf_get_netns_cookie is callable. */ @@ -224,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx) */ return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index b85fc8c423ba..17a9f59bf5f3 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -10,6 +10,8 @@ static __attribute__ ((noinline)) int f0(int var, struct __sk_buff *skb) { + asm volatile (""); + return skb->len; } diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c new file mode 100644 index 000000000000..2588f2384246 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +/* rodata section */ +const volatile pid_t pid; +const volatile size_t bss_array_len; +const volatile size_t data_array_len; + +/* bss section */ +int sum = 0; +int array[1]; + +/* custom data secton */ +int my_array[1] SEC(".data.custom"); + +/* custom data section which should NOT be resizable, + * since it contains a single var which is not an array + */ +int my_int SEC(".data.non_array"); + +/* custom data section which should NOT be resizable, + * since its last var is not an array + */ +int my_array_first[1] SEC(".data.array_not_last"); +int my_int_last SEC(".data.array_not_last"); + +SEC("tp/syscalls/sys_enter_getpid") +int bss_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + sum = 0; + + for (size_t i = 0; i < bss_array_len; ++i) + sum += array[i]; + + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int data_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + sum = 0; + + for (size_t i = 0; i < data_array_len; ++i) + sum += my_array[i]; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index bbad3c2d9aa5..f75e531bf36f 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -265,7 +265,10 @@ static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) { - __u16 *half = (__u16 *)&sk->dst_port; + __u16 *half; + + asm volatile (""); + half = (__u16 *)&sk->dst_port; return half[0] == bpf_htons(0xcafe); } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c new file mode 100644 index 000000000000..29314805ce42 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c @@ -0,0 +1,32 @@ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_tx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_msg SEC(".maps"); + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index baf9ebc6d903..99d2ea9fb658 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -191,7 +191,7 @@ SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) { __u32 lport, rport; - int op, err, ret; + int op, ret; op = (int) skops->op; @@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops) if (lport == 10000) { ret = 1; #ifdef SOCKMAP - err = bpf_sock_map_update(skops, &sock_map, &ret, + bpf_sock_map_update(skops, &sock_map, &ret, BPF_NOEXIST); #else - err = bpf_sock_hash_update(skops, &sock_map, &ret, + bpf_sock_hash_update(skops, &sock_map, &ret, BPF_NOEXIST); #endif } @@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops) if (bpf_ntohl(rport) == 10001) { ret = 10; #ifdef SOCKMAP - err = bpf_sock_map_update(skops, &sock_map, &ret, + bpf_sock_map_update(skops, &sock_map, &ret, BPF_NOEXIST); #else - err = bpf_sock_hash_update(skops, &sock_map, &ret, + bpf_sock_hash_update(skops, &sock_map, &ret, BPF_NOEXIST); #endif } @@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops) break; } - __sink(err); - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c new file mode 100644 index 000000000000..1d86a717a290 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c @@ -0,0 +1,32 @@ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_rx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_tx SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map_msg SEC(".maps"); + +SEC("sk_skb") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c new file mode 100644 index 000000000000..56cdc0a553f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +const volatile int local_pid; +const volatile __u64 cgid; +int remote_pid; + +SEC("tp_btf/task_newtask") +int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags) +{ + struct cgroup *cgrp = NULL; + struct task_struct *acquired; + + if (local_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + acquired = bpf_task_acquire(task); + if (!acquired) + return 0; + + if (local_pid == acquired->tgid) + goto out; + + cgrp = bpf_cgroup_from_id(cgid); + if (!cgrp) + goto out; + + if (bpf_task_under_cgroup(acquired, cgrp)) + remote_pid = acquired->tgid; + +out: + if (cgrp) + bpf_cgroup_release(cgrp); + bpf_task_release(acquired); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 25ee4a22e48d..78c368e71797 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Meta */ #include <stddef.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c new file mode 100644 index 000000000000..db6b3143338b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +int vals[] SEC(".data.vals") = {1, 2, 3, 4}; + +__naked __noinline __used +static unsigned long identity_subprog() +{ + /* the simplest *static* 64-bit identity function */ + asm volatile ( + "r0 = r1;" + "exit;" + ); +} + +__noinline __used +unsigned long global_identity_subprog(__u64 x) +{ + /* the simplest *global* 64-bit identity function */ + return x; +} + +__naked __noinline __used +static unsigned long callback_subprog() +{ + /* the simplest callback function */ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r0") +__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int subprog_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + /* pass r6 through r1 into subprog to get it back as r0; + * this whole chain will have to be marked as precise later + */ + "r1 = r6;" + "call identity_subprog;" + /* now use subprog's returned value (which is a + * r6 -> r1 -> r0 chain), as index into vals array, forcing + * all of that to be known precisely + */ + "r0 *= 4;" + "r1 = %[vals];" + /* here r0->r1->r6 chain is forced to be precise and has to be + * propagated back to the beginning, including through the + * subprog call + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 9 first_idx 0") +__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7") +__naked int global_subprog_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + /* pass r6 through r1 into subprog to get it back as r0; + * given global_identity_subprog is global, precision won't + * propagate all the way back to r6 + */ + "r1 = r6;" + "call global_identity_subprog;" + /* now use subprog's returned value (which is unknown now, so + * we need to clamp it), as index into vals array, forcing r0 + * to be marked precise (with no effect on r6, though) + */ + "if r0 < %[vals_arr_sz] goto 1f;" + "r0 = %[vals_arr_sz] - 1;" + "1:" + "r0 *= 4;" + "r1 = %[vals];" + /* here r0 is forced to be precise and has to be + * propagated back to the global subprog call, but it + * shouldn't go all the way to mark r6 as precise + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_const(vals_arr_sz, ARRAY_SIZE(vals)) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("14: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") +__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") +__msg("mark_precise: frame0: parent state regs=r0 stack=:") +__msg("mark_precise: frame0: last_idx 18 first_idx 0") +__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__naked int callback_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and use result; r0 shouldn't propagate back to + * callback_subprog + */ + "r1 = r6;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r6 = r0;" + "if r6 > 3 goto 1f;" + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the bpf_loop() call, but not beyond + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "1:" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 7 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1") +__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call identity_subprog;" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 7 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise_global(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call global_identity_subprog;" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("12: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: parent state regs=r6 stack=:") +__msg("mark_precise: frame0: last_idx 16 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise_with_callback(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 1;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) callback call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 13 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1") +__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_stack_slot_precise(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call identity_subprog;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 5 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_stack_slot_precise_global(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call global_identity_subprog;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("14: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 18 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +__naked int parent_stack_slot_precise_with_callback(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* ensure we have callback frame in jump history */ + "r1 = r6;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +__noinline __used +static __u64 subprog_with_precise_arg(__u64 x) +{ + return vals[x]; /* x is forced to be precise */ +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("8: (0f) r2 += r1") +__msg("mark_precise: frame1: last_idx 8 first_idx 0") +__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ") +__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2") +__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2") +__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3") +__naked int subprog_arg_precise(void) +{ + asm volatile ( + "r6 = 3;" + "r1 = r6;" + /* subprog_with_precise_arg expects its argument to be + * precise, so r1->r6 will be marked precise from inside the + * subprog + */ + "call subprog_with_precise_arg;" + "r0 += r6;" + "exit;" + : + : + : __clobber_common, "r6" + ); +} + +/* r1 is pointer to stack slot; + * r2 is a register to spill into that slot + * subprog also spills r2 into its own stack slot + */ +__naked __noinline __used +static __u64 subprog_spill_reg_precise(void) +{ + asm volatile ( + /* spill to parent stack */ + "*(u64 *)(r1 + 0) = r2;" + /* spill to subprog stack (we use -16 offset to avoid + * accidental confusion with parent's -8 stack slot in + * verifier log output) + */ + "*(u64 *)(r10 - 16) = r2;" + /* use both spills as return result to propagete precision everywhere */ + "r0 = *(u64 *)(r10 - 16);" + "r2 = *(u64 *)(r1 + 0);" + "r0 += r2;" + "exit;" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +/* precision backtracking can't currently handle stack access not through r10, + * so we won't be able to mark stack slot fp-8 as precise, and so will + * fallback to forcing all as precise + */ +__msg("mark_precise: frame0: falling back to forcing all scalars precise") +__naked int subprog_spill_into_parent_stack_slot_precise(void) +{ + asm volatile ( + "r6 = 1;" + + /* pass pointer to stack slot and r6 to subprog; + * r6 will be marked precise and spilled into fp-8 slot, which + * also should be marked precise + */ + "r1 = r10;" + "r1 += -8;" + "r2 = r6;" + "call subprog_spill_reg_precise;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r7 = *(u64 *)(r10 - 8);" + + "r7 *= 4;" + "r1 = %[vals];" + /* here r7 is forced to be precise and has to be propagated + * back to the beginning, handling subprog call and logic + */ + "r1 += r7;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6", "r7" + ); +} + +__naked __noinline __used +static __u64 subprog_with_checkpoint(void) +{ + asm volatile ( + "r0 = 0;" + /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ + "goto +0;" + "exit;" + ); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index e1c787815e44..b2dfd7066c6e 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -77,7 +77,9 @@ int rx(struct xdp_md *ctx) } err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (err) + if (!err) + meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + else meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index ea82921110da..4d582cac2c09 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,7 +11,6 @@ #include <signal.h> #include <string.h> #include <execinfo.h> /* backtrace */ -#include <linux/membarrier.h> #include <sys/sysinfo.h> /* get_nprocs */ #include <netinet/in.h> #include <sys/select.h> @@ -629,68 +628,6 @@ out: return err; } -static int finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -/* - * Trigger synchronize_rcu() in kernel. - */ -int kern_sync_rcu(void) -{ - return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); -} - -static void unload_bpf_testmod(void) -{ - if (kern_sync_rcu()) - fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); - if (delete_module("bpf_testmod", 0)) { - if (errno == ENOENT) { - if (verbose()) - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); - return; - } - fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - return; - } - if (verbose()) - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); -} - -static int load_bpf_testmod(void) -{ - int fd; - - /* ensure previous instance of the module is unloaded */ - unload_bpf_testmod(); - - if (verbose()) - fprintf(stdout, "Loading bpf_testmod.ko...\n"); - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) { - fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); - return -ENOENT; - } - if (finit_module(fd, "", 0)) { - fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); - close(fd); - return -EINVAL; - } - close(fd); - - if (verbose()) - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); - return 0; -} - /* extern declarations for test funcs */ #define DEFINE_TEST(name) \ extern void test_##name(void) __weak; \ @@ -714,7 +651,13 @@ static struct test_state test_states[ARRAY_SIZE(prog_test_defs)]; const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; -static const char argp_program_doc[] = "BPF selftests test runner"; +static const char argp_program_doc[] = +"BPF selftests test runner\v" +"Options accepting the NAMES parameter take either a comma-separated list\n" +"of test names, or a filename prefixed with @. The file contains one name\n" +"(or wildcard pattern) per line, and comments beginning with # are ignored.\n" +"\n" +"These options can be passed repeatedly to read multiple files.\n"; enum ARG_KEYS { ARG_TEST_NUM = 'n', @@ -797,6 +740,7 @@ extern int extra_prog_load_log_flags; static error_t parse_arg(int key, char *arg, struct argp_state *state) { struct test_env *env = state->input; + int err = 0; switch (key) { case ARG_TEST_NUM: { @@ -821,18 +765,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } case ARG_TEST_NAME_GLOB_ALLOWLIST: case ARG_TEST_NAME: { - if (parse_test_list(arg, - &env->test_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) - return -ENOMEM; + if (arg[0] == '@') + err = parse_test_list_file(arg + 1, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST); + else + err = parse_test_list(arg, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST); + break; } case ARG_TEST_NAME_GLOB_DENYLIST: case ARG_TEST_NAME_BLACKLIST: { - if (parse_test_list(arg, - &env->test_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) - return -ENOMEM; + if (arg[0] == '@') + err = parse_test_list_file(arg + 1, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST); + else + err = parse_test_list(arg, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST); + break; } case ARG_VERIFIER_STATS: @@ -900,7 +854,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) default: return ARGP_ERR_UNKNOWN; } - return 0; + return err; } /* @@ -1703,9 +1657,14 @@ int main(int argc, char **argv) env.stderr = stderr; env.has_testmod = true; - if (!env.list_test_names && load_bpf_testmod()) { - fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); - env.has_testmod = false; + if (!env.list_test_names) { + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose()); + + if (load_bpf_testmod(verbose())) { + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); + env.has_testmod = false; + } } /* initializing tests */ @@ -1802,7 +1761,7 @@ int main(int argc, char **argv) close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); + unload_bpf_testmod(verbose()); free_test_selector(&env.test_selector); free_test_selector(&env.subtest_selector); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ed3134333d4..77bd492c6024 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr) int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); -int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); int write_sysctl(const char *sysctl, const char *value); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e4657c5bc3f1..71704a38cac3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -40,6 +40,7 @@ #include "bpf_util.h" #include "test_btf.h" #include "../../../include/linux/filter.h" +#include "testing_helpers.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -873,8 +874,140 @@ static int create_map_kptr(void) return fd; } +static void set_root(bool set) +{ + __u64 caps; + + if (set) { + if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } else { + if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } +} + +static __u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t) ptr; +} + +static struct btf *btf__load_testmod_btf(struct btf *vmlinux) +{ + struct bpf_btf_info info; + __u32 len = sizeof(info); + struct btf *btf = NULL; + char name[64]; + __u32 id = 0; + int err, fd; + + /* Iterate all loaded BTF objects and find bpf_testmod, + * we need SYS_ADMIN cap for that. + */ + set_root(true); + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + perror("bpf_btf_get_next_id failed"); + break; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + perror("bpf_btf_get_fd_by_id failed"); + break; + } + + memset(&info, 0, sizeof(info)); + info.name_len = sizeof(name); + info.name = ptr_to_u64(name); + len = sizeof(info); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + close(fd); + perror("bpf_obj_get_info_by_fd failed"); + break; + } + + if (strcmp("bpf_testmod", name)) { + close(fd); + continue; + } + + btf = btf__load_from_kernel_by_id_split(id, vmlinux); + if (!btf) { + close(fd); + break; + } + + /* We need the fd to stay open so it can be used in fd_array. + * The final cleanup call to btf__free will free btf object + * and close the file descriptor. + */ + btf__set_fd(btf, fd); + break; + } + + set_root(false); + return btf; +} + +static struct btf *testmod_btf; +static struct btf *vmlinux_btf; + +static void kfuncs_cleanup(void) +{ + btf__free(testmod_btf); + btf__free(vmlinux_btf); +} + +static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array, + struct kfunc_btf_id_pair *fixup_kfunc_btf_id) +{ + /* Patch in kfunc BTF IDs */ + while (fixup_kfunc_btf_id->kfunc) { + int btf_id = 0; + + /* try to find kfunc in kernel BTF */ + vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf(); + if (vmlinux_btf) { + btf_id = btf__find_by_name_kind(vmlinux_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + + /* kfunc not found in kernel BTF, try bpf_testmod BTF */ + if (!btf_id) { + testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf); + if (testmod_btf) { + btf_id = btf__find_by_name_kind(testmod_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + if (btf_id) { + /* We put bpf_testmod module fd into fd_array + * and its index 1 into instruction 'off'. + */ + *fd_array = btf__fd(testmod_btf); + prog[fixup_kfunc_btf_id->insn_idx].off = 1; + } + } + } + + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } +} + static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, - struct bpf_insn *prog, int *map_fds) + struct bpf_insn *prog, int *map_fds, int *fd_array) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; int *fixup_map_hash_48b = test->fixup_map_hash_48b; @@ -899,7 +1032,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; int *fixup_map_kptr = test->fixup_map_kptr; - struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -1100,25 +1232,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } while (*fixup_map_kptr); } - /* Patch in kfunc BTF IDs */ - if (fixup_kfunc_btf_id->kfunc) { - struct btf *btf; - int btf_id; - - do { - btf_id = 0; - btf = btf__load_vmlinux_btf(); - if (btf) { - btf_id = btf__find_by_name_kind(btf, - fixup_kfunc_btf_id->kfunc, - BTF_KIND_FUNC); - btf_id = btf_id < 0 ? 0 : btf_id; - } - btf__free(btf); - prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; - fixup_kfunc_btf_id++; - } while (fixup_kfunc_btf_id->kfunc); - } + fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } struct libcap { @@ -1445,6 +1559,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int fd_array[2] = { -1, -1 }; int saved_errno; int fixup_skips; __u32 pflags; @@ -1458,7 +1573,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; fixup_skips = skips; - do_test_fixup(test, prog_type, prog, map_fds); + do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]); if (test->fill_insns) { prog = test->fill_insns; prog_len = test->prog_len; @@ -1492,6 +1607,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, else opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; + if (fd_array[1] != -1) + opts.fd_array = &fd_array[0]; if ((prog_type == BPF_PROG_TYPE_TRACING || prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { @@ -1684,6 +1801,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0; + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose); + + if (load_bpf_testmod(verbose)) + return EXIT_FAILURE; + for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; @@ -1711,6 +1834,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) } } + unload_bpf_testmod(verbose); + kfuncs_cleanup(); + printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, skips, errors); return errors ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 377fb157a57c..c2ad50f26b63 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -68,9 +68,6 @@ # Run with verbose output: # sudo ./test_xsk.sh -v # -# Run and dump packet contents: -# sudo ./test_xsk.sh -D -# # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: # sudo ./test_xsk.sh -d # @@ -81,11 +78,10 @@ ETH="" -while getopts "vDi:d" flag +while getopts "vi:d" flag do case "${flag}" in v) verbose=1;; - D) dump_pkts=1;; d) debug=1;; i) ETH=${OPTARG};; esac @@ -157,10 +153,6 @@ if [[ $verbose -eq 1 ]]; then ARGS+="-v " fi -if [[ $dump_pkts -eq 1 ]]; then - ARGS="-D " -fi - retval=$? test_status $retval "${TEST_NAME}" diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 0b5e0829e5be..8d994884c7b4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (C) 2019 Netronome Systems, Inc. */ /* Copyright (C) 2020 Facebook, Inc. */ +#include <ctype.h> #include <stdlib.h> #include <string.h> #include <errno.h> @@ -8,6 +9,7 @@ #include <bpf/libbpf.h> #include "test_progs.h" #include "testing_helpers.h" +#include <linux/membarrier.h> int parse_num_list(const char *s, bool **num_set, int *num_set_len) { @@ -70,92 +72,168 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } -int parse_test_list(const char *s, - struct test_filter_set *set, - bool is_glob_pattern) +static int do_insert_test(struct test_filter_set *set, + char *test_str, + char *subtest_str) { - char *input, *state = NULL, *next; - struct test_filter *tmp, *tests = NULL; - int i, j, cnt = 0; + struct test_filter *tmp, *test; + char **ctmp; + int i; - input = strdup(s); - if (!input) + for (i = 0; i < set->cnt; i++) { + test = &set->tests[i]; + + if (strcmp(test_str, test->name) == 0) { + free(test_str); + goto subtest; + } + } + + tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1)); + if (!tmp) return -ENOMEM; - while ((next = strtok_r(state ? NULL : input, ",", &state))) { - char *subtest_str = strchr(next, '/'); - char *pattern = NULL; - int glob_chars = 0; + set->tests = tmp; + test = &set->tests[set->cnt]; - tmp = realloc(tests, sizeof(*tests) * (cnt + 1)); - if (!tmp) - goto err; - tests = tmp; + test->name = test_str; + test->subtests = NULL; + test->subtest_cnt = 0; - tests[cnt].subtest_cnt = 0; - tests[cnt].subtests = NULL; + set->cnt++; - if (is_glob_pattern) { - pattern = "%s"; - } else { - pattern = "*%s*"; - glob_chars = 2; - } +subtest: + if (!subtest_str) + return 0; - if (subtest_str) { - char **tmp_subtests = NULL; - int subtest_cnt = tests[cnt].subtest_cnt; - - *subtest_str = '\0'; - subtest_str += 1; - tmp_subtests = realloc(tests[cnt].subtests, - sizeof(*tmp_subtests) * - (subtest_cnt + 1)); - if (!tmp_subtests) - goto err; - tests[cnt].subtests = tmp_subtests; - - tests[cnt].subtests[subtest_cnt] = - malloc(strlen(subtest_str) + glob_chars + 1); - if (!tests[cnt].subtests[subtest_cnt]) - goto err; - sprintf(tests[cnt].subtests[subtest_cnt], - pattern, - subtest_str); - - tests[cnt].subtest_cnt++; + for (i = 0; i < test->subtest_cnt; i++) { + if (strcmp(subtest_str, test->subtests[i]) == 0) { + free(subtest_str); + return 0; } + } - tests[cnt].name = malloc(strlen(next) + glob_chars + 1); - if (!tests[cnt].name) - goto err; - sprintf(tests[cnt].name, pattern, next); + ctmp = realloc(test->subtests, + sizeof(*test->subtests) * (test->subtest_cnt + 1)); + if (!ctmp) + return -ENOMEM; - cnt++; + test->subtests = ctmp; + test->subtests[test->subtest_cnt] = subtest_str; + + test->subtest_cnt++; + + return 0; +} + +static int insert_test(struct test_filter_set *set, + char *test_spec, + bool is_glob_pattern) +{ + char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL; + int glob_chars = 0; + + if (is_glob_pattern) { + pattern = "%s"; + } else { + pattern = "*%s*"; + glob_chars = 2; } - tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt)); - if (!tmp) + subtest_str = strchr(test_spec, '/'); + if (subtest_str) { + *subtest_str = '\0'; + subtest_str += 1; + } + + ext_test_str = malloc(strlen(test_spec) + glob_chars + 1); + if (!ext_test_str) goto err; - memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt); - set->tests = tmp; - set->cnt += cnt; + sprintf(ext_test_str, pattern, test_spec); - free(tests); - free(input); - return 0; + if (subtest_str) { + ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1); + if (!ext_subtest_str) + goto err; + + sprintf(ext_subtest_str, pattern, subtest_str); + } + + return do_insert_test(set, ext_test_str, ext_subtest_str); err: - for (i = 0; i < cnt; i++) { - for (j = 0; j < tests[i].subtest_cnt; j++) - free(tests[i].subtests[j]); + free(ext_test_str); + free(ext_subtest_str); + + return -ENOMEM; +} + +int parse_test_list_file(const char *path, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *buf = NULL, *capture_start, *capture_end, *scan_end; + size_t buflen = 0; + int err = 0; + FILE *f; + + f = fopen(path, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open '%s': %d\n", path, err); + return err; + } + + while (getline(&buf, &buflen, f) != -1) { + capture_start = buf; + + while (isspace(*capture_start)) + ++capture_start; + + capture_end = capture_start; + scan_end = capture_start; + + while (*scan_end && *scan_end != '#') { + if (!isspace(*scan_end)) + capture_end = scan_end; + + ++scan_end; + } + + if (capture_end == capture_start) + continue; + + *(++capture_end) = '\0'; + + err = insert_test(set, capture_start, is_glob_pattern); + if (err) + break; + } + + fclose(f); + return err; +} + +int parse_test_list(const char *s, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *input, *state = NULL, *test_spec; + int err = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; - free(tests[i].name); + while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) { + err = insert_test(set, test_spec, is_glob_pattern); + if (err) + break; } - free(tests); + free(input); - return -ENOMEM; + return err; } __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) @@ -249,3 +327,63 @@ __u64 read_perf_max_sample_freq(void) fclose(f); return sample_freq; } + +static int finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int delete_module(const char *name, int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +int unload_bpf_testmod(bool verbose) +{ + if (kern_sync_rcu()) + fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); + if (delete_module("bpf_testmod", 0)) { + if (errno == ENOENT) { + if (verbose) + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + return -1; + } + fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + return -1; + } + if (verbose) + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); + return 0; +} + +int load_bpf_testmod(bool verbose) +{ + int fd; + + if (verbose) + fprintf(stdout, "Loading bpf_testmod.ko...\n"); + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) { + fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + return -ENOENT; + } + if (finit_module(fd, "", 0)) { + fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + close(fd); + return -EINVAL; + } + close(fd); + + if (verbose) + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + return 0; +} + +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index eb8790f928e4..5312323881b6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -1,5 +1,9 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (C) 2020 Facebook, Inc. */ + +#ifndef __TESTING_HELPERS_H +#define __TESTING_HELPERS_H + #include <stdbool.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -20,5 +24,13 @@ struct test_filter_set; int parse_test_list(const char *s, struct test_filter_set *test_set, bool is_glob_pattern); +int parse_test_list_file(const char *path, + struct test_filter_set *test_set, + bool is_glob_pattern); __u64 read_perf_max_sample_freq(void); +int load_bpf_testmod(bool verbose); +int unload_bpf_testmod(bool verbose); +int kern_sync_rcu(void); + +#endif /* __TESTING_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 6c03a7d805f9..b8c0aae8e7ec 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -38,25 +38,24 @@ .fixup_map_array_48b = { 1 }, .result = VERBOSE_ACCEPT, .errstr = - "26: (85) call bpf_probe_read_kernel#113\ - last_idx 26 first_idx 20\ - regs=4 stack=0 before 25\ - regs=4 stack=0 before 24\ - regs=4 stack=0 before 23\ - regs=4 stack=0 before 22\ - regs=4 stack=0 before 20\ - parent didn't have regs=4 stack=0 marks\ - last_idx 19 first_idx 10\ - regs=4 stack=0 before 19\ - regs=200 stack=0 before 18\ - regs=300 stack=0 before 17\ - regs=201 stack=0 before 15\ - regs=201 stack=0 before 14\ - regs=200 stack=0 before 13\ - regs=200 stack=0 before 12\ - regs=200 stack=0 before 11\ - regs=200 stack=0 before 10\ - parent already had regs=0 stack=0 marks", + "mark_precise: frame0: last_idx 26 first_idx 20\ + mark_precise: frame0: regs=r2 stack= before 25\ + mark_precise: frame0: regs=r2 stack= before 24\ + mark_precise: frame0: regs=r2 stack= before 23\ + mark_precise: frame0: regs=r2 stack= before 22\ + mark_precise: frame0: regs=r2 stack= before 20\ + mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: last_idx 19 first_idx 10\ + mark_precise: frame0: regs=r2 stack= before 19\ + mark_precise: frame0: regs=r9 stack= before 18\ + mark_precise: frame0: regs=r8,r9 stack= before 17\ + mark_precise: frame0: regs=r0,r9 stack= before 15\ + mark_precise: frame0: regs=r0,r9 stack= before 14\ + mark_precise: frame0: regs=r9 stack= before 13\ + mark_precise: frame0: regs=r9 stack= before 12\ + mark_precise: frame0: regs=r9 stack= before 11\ + mark_precise: frame0: regs=r9 stack= before 10\ + mark_precise: frame0: parent state regs= stack=:", }, { "precise: test 2", @@ -100,20 +99,20 @@ .flags = BPF_F_TEST_STATE_FREQ, .errstr = "26: (85) call bpf_probe_read_kernel#113\ - last_idx 26 first_idx 22\ - regs=4 stack=0 before 25\ - regs=4 stack=0 before 24\ - regs=4 stack=0 before 23\ - regs=4 stack=0 before 22\ - parent didn't have regs=4 stack=0 marks\ - last_idx 20 first_idx 20\ - regs=4 stack=0 before 20\ - parent didn't have regs=4 stack=0 marks\ - last_idx 19 first_idx 17\ - regs=4 stack=0 before 19\ - regs=200 stack=0 before 18\ - regs=300 stack=0 before 17\ - parent already had regs=0 stack=0 marks", + mark_precise: frame0: last_idx 26 first_idx 22\ + mark_precise: frame0: regs=r2 stack= before 25\ + mark_precise: frame0: regs=r2 stack= before 24\ + mark_precise: frame0: regs=r2 stack= before 23\ + mark_precise: frame0: regs=r2 stack= before 22\ + mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: last_idx 20 first_idx 20\ + mark_precise: frame0: regs=r2 stack= before 20\ + mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: last_idx 19 first_idx 17\ + mark_precise: frame0: regs=r2 stack= before 19\ + mark_precise: frame0: regs=r9 stack= before 18\ + mark_precise: frame0: regs=r8,r9 stack= before 17\ + mark_precise: frame0: parent state regs= stack=:", }, { "precise: cross frame pruning", @@ -153,15 +152,16 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "5: (2d) if r4 > r0 goto pc+0\ - last_idx 5 first_idx 5\ - parent didn't have regs=10 stack=0 marks\ - last_idx 4 first_idx 2\ - regs=10 stack=0 before 4\ - regs=10 stack=0 before 3\ - regs=0 stack=1 before 2\ - last_idx 5 first_idx 5\ - parent didn't have regs=1 stack=0 marks", + .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\ + mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: last_idx 4 first_idx 2\ + mark_precise: frame0: regs=r4 stack= before 4\ + mark_precise: frame0: regs=r4 stack= before 3\ + mark_precise: frame0: regs= stack=-8 before 2\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ + mark_precise: frame0: last_idx 5 first_idx 5\ + mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -179,16 +179,19 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "last_idx 6 first_idx 6\ - parent didn't have regs=10 stack=0 marks\ - last_idx 5 first_idx 3\ - regs=10 stack=0 before 5\ - regs=10 stack=0 before 4\ - regs=0 stack=1 before 3\ - last_idx 6 first_idx 6\ - parent didn't have regs=1 stack=0 marks\ - last_idx 5 first_idx 3\ - regs=1 stack=0 before 5", + .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: last_idx 5 first_idx 3\ + mark_precise: frame0: regs=r4 stack= before 5\ + mark_precise: frame0: regs=r4 stack= before 4\ + mark_precise: frame0: regs= stack=-8 before 3\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -217,3 +220,39 @@ .errstr = "invalid access to memory, mem_size=1 off=42 size=8", .result = REJECT, }, +{ + "precise: program doesn't prematurely prune branches", + .insns = { + BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0), + BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_LD_MAP_FD(BPF_REG_4, 0), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 13 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "register with unbounded min value is not allowed", +}, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1db7185181da..655095810d4a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -141,6 +141,7 @@ static struct env { bool verbose; bool debug; bool quiet; + bool force_checkpoints; enum resfmt out_fmt; bool show_version; bool comparison_mode; @@ -209,6 +210,8 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -284,6 +287,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case 't': + env.force_checkpoints = true; + break; case 'C': env.comparison_mode = true; break; @@ -989,6 +995,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf /* increase chances of successful BPF object loading */ fixup_obj(obj, prog, base_filename); + if (env.force_checkpoints) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + err = bpf_object__load(obj); env.progs_processed++; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 987cf0db5ebc..613321eb84c1 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -27,6 +27,7 @@ #include <sys/mman.h> #include <net/if.h> #include <poll.h> +#include <time.h> #include "xdp_metadata.h" @@ -134,18 +135,52 @@ static void refill_rx(struct xsk *xsk, __u64 addr) } } -static void verify_xdp_metadata(void *data) +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +static __u64 gettime(clockid_t clock_id) +{ + struct timespec t; + int res; + + /* See man clock_gettime(2) for type of clock_id's */ + res = clock_gettime(clock_id, &t); + + if (res < 0) + error(res, errno, "Error with clock_gettime()"); + + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - printf("rx_timestamp: %llu\n", meta->rx_timestamp); if (meta->rx_hash_err < 0) printf("No rx_hash err=%d\n", meta->rx_hash_err); else printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + + printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, + (double)meta->rx_timestamp / NANOSEC_PER_SEC); + if (meta->rx_timestamp) { + __u64 usr_clock = gettime(clock_id); + __u64 xdp_clock = meta->xdp_timestamp; + __s64 delta_X = xdp_clock - meta->rx_timestamp; + __s64 delta_X2U = usr_clock - xdp_clock; + + printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", + xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, + (double)delta_X / NANOSEC_PER_SEC, + (double)delta_X / 1000); + + printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", + usr_clock, (double)usr_clock / NANOSEC_PER_SEC, + (double)delta_X2U / NANOSEC_PER_SEC, + (double)delta_X2U / 1000); + } + } static void verify_skb_metadata(int fd) @@ -193,7 +228,7 @@ static void verify_skb_metadata(int fd) printf("skb hwtstamp is not found!\n"); } -static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) +static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; struct pollfd fds[rxq + 1]; @@ -243,7 +278,8 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) addr = xsk_umem__add_offset_to_addr(rx_desc->addr); printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", xsk, idx, rx_desc->addr, addr, comp_addr); - verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr)); + verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), + clock_id); xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); } @@ -370,6 +406,7 @@ static void timestamping_enable(int fd, int val) int main(int argc, char *argv[]) { + clockid_t clock_id = CLOCK_TAI; int server_fd = -1; int ret; int i; @@ -443,7 +480,7 @@ int main(int argc, char *argv[]) error(1, -ret, "bpf_xdp_attach"); signal(SIGINT, handle_signal); - ret = verify_metadata(rx_xsk, rxq, server_fd); + ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id); close(server_fd); cleanup(); if (ret) diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 0c4624dc6f2f..938a729bd307 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -11,6 +11,7 @@ struct xdp_meta { __u64 rx_timestamp; + __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 04ed8b544712..8da8d557768b 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -134,6 +134,11 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); } +static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb) +{ + prod->cached_prod -= nb; +} + static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) { __u32 entries = xsk_cons_nb_avail(cons, nb); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index f144d0604ddf..218d7f694e5c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -76,16 +76,13 @@ #include <asm/barrier.h> #include <linux/if_link.h> #include <linux/if_ether.h> -#include <linux/ip.h> #include <linux/mman.h> -#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> #include <poll.h> #include <pthread.h> #include <signal.h> -#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -94,10 +91,8 @@ #include <sys/socket.h> #include <sys/time.h> #include <sys/types.h> -#include <sys/queue.h> #include <time.h> #include <unistd.h> -#include <stdatomic.h> #include "xsk_xdp_progs.skel.h" #include "xsk.h" @@ -109,10 +104,6 @@ static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; -static const char *IP1 = "192.168.100.162"; -static const char *IP2 = "192.168.100.161"; -static const u16 UDP_PORT1 = 2020; -static const u16 UDP_PORT2 = 2121; static void __exit_with_error(int error, const char *file, const char *func, int line) { @@ -147,112 +138,25 @@ static void report_failure(struct test_spec *test) test->fail = true; } -static void memset32_htonl(void *dest, u32 val, u32 size) -{ - u32 *ptr = (u32 *)dest; - int i; - - val = htonl(val); - - for (i = 0; i < (size & (~0x3)); i += 4) - ptr[i >> 2] = val; -} - -/* - * Fold a partial checksum - * This function code has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_fold(__u32 csum) -{ - u32 sum = (__force u32)csum; - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __u16)~sum; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static u32 from64to32(u64 x) -{ - /* add up 32-bit and 32-bit for 32+c bit */ - x = (x & 0xffffffff) + (x >> 32); - /* add up carry.. */ - x = (x & 0xffffffff) + (x >> 32); - return (u32)x; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - unsigned long long s = (__force u32)sum; - - s += (__force u32)saddr; - s += (__force u32)daddr; -#ifdef __BIG_ENDIAN__ - s += proto + len; -#else - s += (proto + len) << 8; -#endif - return (__force __u32)from64to32(s); -} - -/* - * This function has been taken from - * Linux kernel include/asm-generic/checksum.h +/* The payload is a word consisting of a packet sequence number in the upper + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. */ -static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); -} - -static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) +static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) { - u32 csum = 0; - u32 cnt = 0; - - /* udp hdr and data */ - for (; cnt < len; cnt += 2) - csum += udp_pkt[cnt >> 1]; + u32 *ptr = (u32 *)dest, i; - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); + start /= sizeof(*ptr); + size /= sizeof(*ptr); + for (i = 0; i < size; i++) + ptr[i] = htonl(pkt_nb << 16 | (i + start)); } static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); -} - -static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) -{ - ip_hdr->version = IP_PKT_VER; - ip_hdr->ihl = 0x5; - ip_hdr->tos = IP_PKT_TOS; - ip_hdr->tot_len = htons(IP_PKT_SIZE); - ip_hdr->id = 0; - ip_hdr->frag_off = 0; - ip_hdr->ttl = IPDEFTTL; - ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ifobject->src_ip; - ip_hdr->daddr = ifobject->dst_ip; - ip_hdr->check = 0; -} - -static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, - struct udphdr *udp_hdr) -{ - udp_hdr->source = htons(ifobject->src_port); - udp_hdr->dest = htons(ifobject->dst_port); - udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } static bool is_umem_valid(struct ifobject *ifobj) @@ -260,19 +164,18 @@ static bool is_umem_valid(struct ifobject *ifobj) return !!ifobj->umem->umem; } -static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +static u32 mode_to_xdp_flags(enum test_mode mode) { - udp_hdr->check = 0; - udp_hdr->check = - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; } -static u32 mode_to_xdp_flags(enum test_mode mode) +static u64 umem_size(struct xsk_umem_info *umem) { - return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; + return umem->num_frames * umem->frame_size; } -static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) +static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, + u64 size) { struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, @@ -292,9 +195,31 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size return ret; umem->buffer = buffer; + if (ifobj->shared_umem && ifobj->rx_on) { + umem->base_addr = umem_size(umem); + umem->next_buffer = umem_size(umem); + } + return 0; } +static u64 umem_alloc_buffer(struct xsk_umem_info *umem) +{ + u64 addr; + + addr = umem->next_buffer; + umem->next_buffer += umem->frame_size; + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) + umem->next_buffer = umem->base_addr; + + return addr; +} + +static void umem_reset_alloc(struct xsk_umem_info *umem) +{ + umem->next_buffer = 0; +} + static void enable_busy_poll(struct xsk_socket_info *xsk) { int sock_opt; @@ -354,7 +279,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) exit_with_error(ENOMEM); } umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - ret = xsk_configure_umem(umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz); if (ret) exit_with_error(-ret); @@ -380,7 +305,6 @@ out: static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, - {"dump-pkts", no_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -391,7 +315,6 @@ static void usage(const char *prog) " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" - " -D, --dump-pkts Dump packets L2 - L5\n" " -v, --verbose Verbose output\n" " -b, --busy-poll Enable busy poll\n"; @@ -415,7 +338,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); + c = getopt_long(argc, argv, "i:vb", long_options, &option_index); if (c == -1) break; @@ -437,9 +360,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj interface_nb++; break; - case 'D': - opt_pkt_dump = true; - break; case 'v': opt_verbose = true; break; @@ -482,9 +402,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, memset(ifobj->umem, 0, sizeof(*ifobj->umem)); ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - if (ifobj->shared_umem && ifobj->rx_on) - ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS * - XSK_UMEM__DEFAULT_FRAME_SIZE; for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); @@ -554,24 +471,24 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x static void pkt_stream_reset(struct pkt_stream *pkt_stream) { if (pkt_stream) - pkt_stream->rx_pkt_nb = 0; + pkt_stream->current_pkt_nb = 0; } -static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) +static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) { - if (pkt_nb >= pkt_stream->nb_pkts) + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) return NULL; - return &pkt_stream->pkts[pkt_nb]; + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; } static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) { - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { (*pkts_sent)++; - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; - pkt_stream->rx_pkt_nb++; + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; + pkt_stream->current_pkt_nb++; } return NULL; } @@ -616,9 +533,21 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) return pkt_stream; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +static u32 ceil_u32(u32 a, u32 b) +{ + return (a + b - 1) / b; +} + +static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt) +{ + if (!pkt || !pkt->valid) + return 1; + return ceil_u32(pkt->len, frame_size); +} + +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) { - pkt->addr = addr + umem->base_addr; + pkt->offset = offset; pkt->len = len; if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) pkt->valid = false; @@ -626,6 +555,11 @@ static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 l pkt->valid = true; } +static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) +{ + return ceil_u32(len, umem->frame_size) * umem->frame_size; +} + static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; @@ -635,10 +569,13 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb if (!pkt_stream) exit_with_error(ENOMEM); + pkt_stream->nb_pkts = nb_pkts; + pkt_stream->max_pkt_len = pkt_len; for (i = 0; i < nb_pkts; i++) { - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, - pkt_len); - pkt_stream->pkts[i].payload = i; + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt_set(umem, pkt, 0, pkt_len); + pkt->pkt_nb = i; } return pkt_stream; @@ -669,8 +606,7 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); + pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); ifobj->pkt_stream = pkt_stream; } @@ -694,30 +630,31 @@ static void pkt_stream_receive_half(struct test_spec *test) pkt_stream->pkts[i].valid = false; } -static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) +static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) { - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); - struct udphdr *udp_hdr; - struct ethhdr *eth_hdr; - struct iphdr *ip_hdr; - void *data; + if (!pkt->valid) + return pkt->offset; + return pkt->offset + umem_alloc_buffer(umem); +} - if (!pkt) - return NULL; - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) - return pkt; +static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, + u32 bytes_written) +{ + void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); - eth_hdr = (struct ethhdr *)data; + if (len < MIN_PKT_SIZE) + return; - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); - gen_ip_hdr(ifobject, ip_hdr); - gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr(ifobject, eth_hdr); + if (!bytes_written) { + gen_eth_hdr(ifobject, data); - return pkt; + len -= PKT_HDR_SIZE; + data += PKT_HDR_SIZE; + } else { + bytes_written -= PKT_HDR_SIZE; + } + + write_payload(data, pkt_nb, bytes_written, len); } static void __pkt_stream_generate_custom(struct ifobject *ifobj, @@ -731,10 +668,14 @@ static void __pkt_stream_generate_custom(struct ifobject *ifobj, exit_with_error(ENOMEM); for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr; - pkt_stream->pkts[i].len = pkts[i].len; - pkt_stream->pkts[i].payload = i; - pkt_stream->pkts[i].valid = pkts[i].valid; + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt->offset = pkts[i].offset; + pkt->len = pkts[i].len; + pkt->pkt_nb = i; + pkt->valid = pkts[i].valid; + if (pkt->len > pkt_stream->max_pkt_len) + pkt_stream->max_pkt_len = pkt->len; } ifobj->pkt_stream = pkt_stream; @@ -746,53 +687,62 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts); } -static void pkt_dump(void *pkt, u32 len) -{ - char s[INET_ADDRSTRLEN]; - struct ethhdr *ethhdr; - struct udphdr *udphdr; - struct iphdr *iphdr; - u32 payload, i; - - ethhdr = pkt; - iphdr = pkt + sizeof(*ethhdr); - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); - - /*extract L2 frame */ - fprintf(stdout, "DEBUG>> L2: dst mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_dest[i]); - - fprintf(stdout, "\nDEBUG>> L2: src mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_source[i]); - - /*extract L3 frame */ - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); - /*extract L4 frame */ - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); - /*extract L5 frame */ - payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); +static void pkt_print_data(u32 *data, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + u32 seqnum, pkt_nb; - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); - fprintf(stdout, "---------------------------------------\n"); + seqnum = ntohl(*data) & 0xffff; + pkt_nb = ntohl(*data) >> 16; + fprintf(stdout, "%u:%u ", pkt_nb, seqnum); + data++; + } } -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, - u64 pkt_stream_addr) +static void pkt_dump(void *pkt, u32 len, bool eth_header) +{ + struct ethhdr *ethhdr = pkt; + u32 i, *data; + + if (eth_header) { + /*extract L2 frame */ + fprintf(stdout, "DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_dest[i]); + + fprintf(stdout, "\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_source[i]); + + data = pkt + PKT_HDR_SIZE; + } else { + data = pkt; + } + + /*extract L5 frame */ + fprintf(stdout, "\nDEBUG>> L5: seqnum: "); + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); + fprintf(stdout, "...."); + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { + fprintf(stdout, "\n.... "); + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, + PKT_DUMP_NB_TO_PRINT); + } + fprintf(stdout, "\n---------------------------------------\n"); +} + +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) { u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset = 0; + u32 offset = addr % umem->frame_size, expected_offset; + int pkt_offset = pkt->valid ? pkt->offset : 0; - if (!pkt_stream->use_addr_for_fill) - pkt_stream_addr = 0; + if (!umem->unaligned_mode) + pkt_offset = 0; - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; if (offset == expected_offset) return true; @@ -806,9 +756,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) void *data = xsk_umem__get_data(buffer, addr); struct xdp_info *meta = data - sizeof(struct xdp_info); - if (meta->count != pkt->payload) { + if (meta->count != pkt->pkt_nb) { ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->payload, meta->count); + __func__, pkt->pkt_nb, meta->count); return false; } @@ -818,11 +768,11 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) { void *data = xsk_umem__get_data(buffer, addr); - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + u32 seqnum, pkt_data; if (!pkt) { ksft_print_msg("[%s] too many packets received\n", __func__); - return false; + goto error; } if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { @@ -833,28 +783,23 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) if (pkt->len != len) { ksft_print_msg("[%s] expected length [%d], got length [%d]\n", __func__, pkt->len, len); - return false; + goto error; } - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); - - if (opt_pkt_dump) - pkt_dump(data, PKT_SIZE); + pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); + seqnum = pkt_data >> 16; - if (pkt->payload != seqnum) { - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); - return false; - } - } else { - ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, - iphdr->tos); - return false; + if (pkt->pkt_nb != seqnum) { + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", + __func__, pkt->pkt_nb, seqnum); + goto error; } return true; + +error: + pkt_dump(data, len, true); + return false; } static void kick_tx(struct xsk_socket_info *xsk) @@ -976,7 +921,7 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) addr = xsk_umem__add_offset_to_addr(addr); if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || - !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) return TEST_FAILURE; @@ -992,8 +937,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) pthread_mutex_lock(&pacing_mutex); pkts_in_flight -= pkts_sent; - if (pkts_in_flight < umem->num_frames) - pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); pkts_sent = 0; } @@ -1001,14 +944,21 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, - bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) { struct xsk_socket_info *xsk = ifobject->xsk; + struct xsk_umem_info *umem = ifobject->umem; + u32 i, idx = 0, valid_pkts = 0, buffer_len; bool use_poll = ifobject->use_poll; - u32 i, idx = 0, valid_pkts = 0; int ret; + buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len); + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { + kick_tx(xsk); + return TEST_CONTINUE; + } + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { ret = poll(fds, 1, POLL_TMOUT); @@ -1034,25 +984,21 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); + struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream); if (!pkt) break; - tx_desc->addr = pkt->addr; + tx_desc->addr = pkt_get_addr(pkt, umem); tx_desc->len = pkt->len; - (*pkt_nb)++; - if (pkt->valid) + if (pkt->valid) { valid_pkts++; + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0); + } } pthread_mutex_lock(&pacing_mutex); pkts_in_flight += valid_pkts; - /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { - kick_tx(xsk); - pthread_cond_wait(&pacing_cond, &pacing_mutex); - } pthread_mutex_unlock(&pacing_mutex); xsk_ring_prod__submit(&xsk->tx, i); @@ -1088,18 +1034,21 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { + struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); struct pollfd fds = { }; - u32 pkt_cnt = 0, ret; + u32 ret; fds.fd = xsk_socket__fd(ifobject->xsk->xsk); fds.events = POLLOUT; - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { + ret = __send_pkts(ifobject, &fds, timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; if ((ret || test->fail) && !timeout) return TEST_FAILURE; - else if (ret == TEST_PASS && timeout) + if (ret == TEST_PASS && timeout) return ret; } @@ -1249,11 +1198,14 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje ifobject->xsk = &ifobject->xsk_arr[0]; ifobject->xskmap = test->ifobj_rx->xskmap; memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); + ifobject->umem->base_addr = 0; } -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) +static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, + bool fill_up) { - u32 idx = 0, i, buffers_to_fill; + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; int ret; if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) @@ -1264,22 +1216,33 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); if (ret != buffers_to_fill) exit_with_error(ENOSPC); - for (i = 0; i < buffers_to_fill; i++) { - u64 addr; - if (pkt_stream->use_addr_for_fill) { - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); + while (filled < buffers_to_fill) { + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); + u64 addr; + u32 i; + + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) { + if (!pkt) { + if (!fill_up) + break; + addr = filled * umem->frame_size + umem->base_addr; + } else if (pkt->offset >= 0) { + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); + } else { + addr = pkt->offset + umem_alloc_buffer(umem); + } - if (!pkt) + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + if (++filled >= buffers_to_fill) break; - addr = pkt->addr; - } else { - addr = i * umem->frame_size; } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; } - xsk_ring_prod__submit(&umem->fq, i); + xsk_ring_prod__submit(&umem->fq, filled); + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); + + pkt_stream_reset(pkt_stream); + umem_reset_alloc(umem); } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -1300,12 +1263,10 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (bufs == MAP_FAILED) exit_with_error(errno); - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); if (ret) exit_with_error(-ret); - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); - xsk_configure_socket(test, ifobject, ifobject->umem, false); ifobject->xsk = &ifobject->xsk_arr[0]; @@ -1313,6 +1274,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); if (ret) exit_with_error(errno); @@ -1370,12 +1333,8 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) { + if (err) report_failure(test); - pthread_mutex_lock(&pacing_mutex); - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); - } pthread_exit(NULL); } @@ -1402,11 +1361,20 @@ static void handler(int signum) pthread_exit(NULL); } -static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) +static bool xdp_prog_changed_rx(struct test_spec *test) { + struct ifobject *ifobj = test->ifobj_rx; + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; } +static bool xdp_prog_changed_tx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_tx; + + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; +} + static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, struct bpf_map *xskmap, enum test_mode mode) { @@ -1433,13 +1401,13 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, struct ifobject *ifobj_tx) { - if (xdp_prog_changed(test, ifobj_rx)) + if (xdp_prog_changed_rx(test)) xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); if (!ifobj_tx || ifobj_tx->shared_umem) return; - if (xdp_prog_changed(test, ifobj_tx)) + if (xdp_prog_changed_tx(test)) xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); } @@ -1448,9 +1416,11 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i { pthread_t t0, t1; - if (ifobj2) + if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); + pkt_stream_reset(ifobj2->pkt_stream); + } test->current_step++; pkt_stream_reset(ifobj1->pkt_stream); @@ -1493,6 +1463,12 @@ static int testapp_validate_traffic(struct test_spec *test) struct ifobject *ifobj_rx = test->ifobj_rx; struct ifobject *ifobj_tx = test->ifobj_tx; + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { + ksft_test_result_skip("No huge pages present.\n"); + return TEST_SKIP; + } + xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); } @@ -1502,16 +1478,18 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct return __testapp_validate_traffic(test, ifobj, NULL); } -static void testapp_teardown(struct test_spec *test) +static int testapp_teardown(struct test_spec *test) { int i; test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; test_spec_reset(test); } + + return TEST_PASS; } static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) @@ -1526,20 +1504,23 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) *ifobj2 = tmp_ifobj; } -static void testapp_bidi(struct test_spec *test) +static int testapp_bidi(struct test_spec *test) { + int res; + test_spec_set_name(test, "BIDIRECTIONAL"); test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); - __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); swap_directions(&test->ifobj_rx, &test->ifobj_tx); + return res; } static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) @@ -1556,160 +1537,139 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj exit_with_error(errno); } -static void testapp_bpf_res(struct test_spec *test) +static int testapp_bpf_res(struct test_spec *test) { test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_headroom(struct test_spec *test) +static int testapp_headroom(struct test_spec *test) { test_spec_set_name(test, "UMEM_HEADROOM"); test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_dropped(struct test_spec *test) +static int testapp_stats_rx_dropped(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_DROPPED"); + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); + return TEST_SKIP; + } + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; pkt_stream_receive_half(test); test->ifobj_rx->validation_func = validate_rx_dropped; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_tx_invalid_descs(struct test_spec *test) +static int testapp_stats_tx_invalid_descs(struct test_spec *test) { test_spec_set_name(test, "STAT_TX_INVALID"); pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); test->ifobj_tx->validation_func = validate_tx_invalid_descs; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_full(struct test_spec *test) +static int testapp_stats_rx_full(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FULL"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; test->ifobj_rx->validation_func = validate_rx_full; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_fill_empty(struct test_spec *test) +static int testapp_stats_fill_empty(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; - testapp_validate_traffic(test); -} - -/* Simple test */ -static bool hugepages_present(struct ifobject *ifobject) -{ - size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0); - if (bufs == MAP_FAILED) - return false; - - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; - munmap(bufs, mmap_sz); - return true; + return testapp_validate_traffic(test); } -static bool testapp_unaligned(struct test_spec *test) +static int testapp_unaligned(struct test_spec *test) { - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return false; - } - test_spec_set_name(test, "UNALIGNED_MODE"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); + /* Let half of the packets straddle a 4K buffer boundary */ + pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); - return true; + return testapp_validate_traffic(test); } -static void testapp_single_pkt(struct test_spec *test) +static int testapp_single_pkt(struct test_spec *test) { - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_invalid_desc(struct test_spec *test) +static int testapp_invalid_desc(struct test_spec *test) { - u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; struct pkt pkts[] = { /* Zero packet address allowed */ - {0, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Allowed packet */ - {0x1000, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Straddling the start of umem */ - {-2, PKT_SIZE, 0, false}, + {-2, MIN_PKT_SIZE, 0, false}, /* Packet too large */ - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, /* Up to end of umem allowed */ - {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, /* After umem ends */ - {umem_size, PKT_SIZE, 0, false}, + {umem_size, MIN_PKT_SIZE, 0, false}, /* Straddle the end of umem */ - {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a page boundrary */ - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a 2K boundrary */ - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 4K boundary */ + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 2K boundary */ + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, /* Valid packet for synch so that something is received */ - {0x4000, PKT_SIZE, 0, true}}; + {0, MIN_PKT_SIZE, 0, true}}; - if (test->ifobj_tx->umem->unaligned_mode) { - /* Crossing a page boundrary allowed */ + if (umem->unaligned_mode) { + /* Crossing a page boundary allowed */ pkts[7].valid = true; } - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundrary not allowed */ + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundary not allowed */ pkts[8].valid = false; } if (test->ifobj_tx->shared_umem) { - pkts[4].addr += umem_size; - pkts[5].addr += umem_size; - pkts[6].addr += umem_size; + pkts[4].offset += umem_size; + pkts[5].offset += umem_size; + pkts[6].offset += umem_size; } pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_drop(struct test_spec *test) +static int testapp_xdp_drop(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1719,10 +1679,10 @@ static void testapp_xdp_drop(struct test_spec *test) skel_rx->maps.xsk, skel_tx->maps.xsk); pkt_stream_receive_half(test); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_metadata_count(struct test_spec *test) +static int testapp_xdp_metadata_count(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1743,10 +1703,10 @@ static void testapp_xdp_metadata_count(struct test_spec *test) if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) exit_with_error(errno); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_poll_txq_tmout(struct test_spec *test) +static int testapp_poll_txq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_TXQ_FULL"); @@ -1754,14 +1714,14 @@ static void testapp_poll_txq_tmout(struct test_spec *test) /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ test->ifobj_tx->umem->frame_size = 2048; pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); - testapp_validate_traffic_single_thread(test, test->ifobj_tx); + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); } -static void testapp_poll_rxq_tmout(struct test_spec *test) +static int testapp_poll_rxq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_RXQ_EMPTY"); test->ifobj_rx->use_poll = true; - testapp_validate_traffic_single_thread(test, test->ifobj_rx); + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); } static int xsk_load_xdp_programs(struct ifobject *ifobj) @@ -1778,25 +1738,30 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj) xsk_xdp_progs__destroy(ifobj->xdp_progs); } +/* Simple test */ +static bool hugepages_present(void) +{ + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); + if (bufs == MAP_FAILED) + return false; + + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + munmap(bufs, mmap_sz); + return true; +} + static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - const char *dst_ip, const char *src_ip, const u16 dst_port, - const u16 src_port, thread_func_t func_ptr) + thread_func_t func_ptr) { - struct in_addr ip; int err; memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - inet_aton(dst_ip, &ip); - ifobj->dst_ip = ip.s_addr; - - inet_aton(src_ip, &ip); - ifobj->src_ip = ip.s_addr; - - ifobj->dst_port = dst_port; - ifobj->src_port = src_port; - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); @@ -1804,94 +1769,87 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * printf("Error loading XDP program\n"); exit_with_error(err); } + + if (hugepages_present()) + ifobj->unaligned_supp = true; } static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { + int ret = TEST_SKIP; + switch (type) { case TEST_TYPE_STATS_RX_DROPPED: - if (mode == TEST_MODE_ZC) { - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); - return; - } - testapp_stats_rx_dropped(test); + ret = testapp_stats_rx_dropped(test); break; case TEST_TYPE_STATS_TX_INVALID_DESCS: - testapp_stats_tx_invalid_descs(test); + ret = testapp_stats_tx_invalid_descs(test); break; case TEST_TYPE_STATS_RX_FULL: - testapp_stats_rx_full(test); + ret = testapp_stats_rx_full(test); break; case TEST_TYPE_STATS_FILL_EMPTY: - testapp_stats_fill_empty(test); + ret = testapp_stats_fill_empty(test); break; case TEST_TYPE_TEARDOWN: - testapp_teardown(test); + ret = testapp_teardown(test); break; case TEST_TYPE_BIDI: - testapp_bidi(test); + ret = testapp_bidi(test); break; case TEST_TYPE_BPF_RES: - testapp_bpf_res(test); + ret = testapp_bpf_res(test); break; case TEST_TYPE_RUN_TO_COMPLETION: test_spec_set_name(test, "RUN_TO_COMPLETION"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - testapp_single_pkt(test); + ret = testapp_single_pkt(test); break; case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); - testapp_validate_traffic(test); + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; test_spec_set_name(test, "POLL_RX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_TX_POLL: test->ifobj_tx->use_poll = true; test_spec_set_name(test, "POLL_TX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_POLL_TXQ_TMOUT: - testapp_poll_txq_tmout(test); + ret = testapp_poll_txq_tmout(test); break; case TEST_TYPE_POLL_RXQ_TMOUT: - testapp_poll_rxq_tmout(test); + ret = testapp_poll_rxq_tmout(test); break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC: - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { u64 page_size, umem_size; - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); /* Odd frame size so the UMEM doesn't end near a page boundary. */ test->ifobj_tx->umem->frame_size = 4001; @@ -1903,29 +1861,28 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ */ page_size = sysconf(_SC_PAGESIZE); umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; - assert(umem_size % page_size > PKT_SIZE); - assert(umem_size % page_size < page_size - PKT_SIZE); - testapp_invalid_desc(test); + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + ret = testapp_invalid_desc(test); break; } case TEST_TYPE_UNALIGNED: - if (!testapp_unaligned(test)) - return; + ret = testapp_unaligned(test); break; case TEST_TYPE_HEADROOM: - testapp_headroom(test); + ret = testapp_headroom(test); break; case TEST_TYPE_XDP_DROP_HALF: - testapp_xdp_drop(test); + ret = testapp_xdp_drop(test); break; case TEST_TYPE_XDP_METADATA_COUNT: - testapp_xdp_metadata_count(test); + ret = testapp_xdp_metadata_count(test); break; default: break; } - if (!test->fail) + if (ret == TEST_PASS) ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), test->name); pkt_stream_restore_default(test); @@ -2030,14 +1987,12 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_tx); + init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); + init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); + tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index c535aeab2ca3..aaf27e067640 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -30,22 +30,14 @@ #define TEST_PASS 0 #define TEST_FAILURE -1 #define TEST_CONTINUE 1 +#define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 #define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 32 #define MAX_TEARDOWN_ITER 10 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) -#define MIN_ETH_PKT_SIZE 64 -#define ETH_FCS_SIZE 4 -#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) -#define PKT_SIZE (MIN_PKT_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) -#define IP_PKT_VER 0x4 -#define IP_PKT_TOS 0x9 -#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ +#define MIN_PKT_SIZE 64 #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 @@ -57,6 +49,7 @@ #define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define HUGEPAGE_SIZE (2 * 1024 * 1024) +#define PKT_DUMP_NB_TO_PRINT 16 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -93,13 +86,13 @@ enum test_type { TEST_TYPE_MAX }; -static bool opt_pkt_dump; static bool opt_verbose; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; + u64 next_buffer; u32 num_frames; u32 frame_headroom; void *buffer; @@ -118,17 +111,17 @@ struct xsk_socket_info { }; struct pkt { - u64 addr; + int offset; u32 len; - u32 payload; + u32 pkt_nb; bool valid; }; struct pkt_stream { u32 nb_pkts; - u32 rx_pkt_nb; + u32 current_pkt_nb; struct pkt *pkts; - bool use_addr_for_fill; + u32 max_pkt_len; }; struct ifobject; @@ -148,11 +141,7 @@ struct ifobject { struct bpf_program *xdp_prog; enum test_mode mode; int ifindex; - u32 dst_ip; - u32 src_ip; u32 bind_flags; - u16 src_port; - u16 dst_port; bool tx_on; bool rx_on; bool use_poll; @@ -161,6 +150,7 @@ struct ifobject { bool release_rx; bool shared_umem; bool use_metadata; + bool unaligned_supp; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -184,7 +174,6 @@ struct test_spec { pthread_barrier_t barr; pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; int pkts_in_flight; diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh index 0cf9e47e3209..a5c2aec52898 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh @@ -16,10 +16,9 @@ # +----------------|--+ +--|-----------------+ # | | # +----------------|-------------------------|-----------------+ -# | SW | | | +# | SW $swp1 + + $swp2 | +# | | | | # | +--------------|-------------------------|---------------+ | -# | | $swp1 + + $swp2 | | -# | | | | | | # | | $swp1.10 + + $swp2.10 | | # | | | | # | | br0 | | diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh index df2b09966886..7d7f862c809c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh @@ -15,10 +15,9 @@ # +----------------|--+ +--|-----------------+ # | | # +----------------|-------------------------|-----------------+ -# | SW | | | +# | SW $swp1 + + $swp2 | +# | | | | # | +--------------|-------------------------|---------------+ | -# | | $swp1 + + $swp2 | | -# | | | | | | # | | $swp1.10 + + $swp2.10 | | # | | | | # | | br0 | | diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index d6e106fbce11..a1e955d2de4c 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 all: -TEST_PROGS := ftracetest +TEST_PROGS_EXTENDED := ftracetest +TEST_PROGS := ftracetest-ktap TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index c3311c8c4089..2506621e75df 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -13,6 +13,7 @@ echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]" echo " Options:" echo " -h|--help Show help message" echo " -k|--keep Keep passed test logs" +echo " -K|--ktap Output in KTAP format" echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" echo " -vvv Alias of -v -v -v (Show all commands immediately)" @@ -85,6 +86,10 @@ parse_opts() { # opts KEEP_LOG=1 shift 1 ;; + --ktap|-K) + KTAP=1 + shift 1 + ;; --verbose|-v|-vv|-vvv) if [ $VERBOSE -eq -1 ]; then usage "--console can not use with --verbose" @@ -178,6 +183,7 @@ TEST_DIR=$TOP_DIR/test.d TEST_CASES=`find_testcases $TEST_DIR` LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/ KEEP_LOG=0 +KTAP=0 DEBUG=0 VERBOSE=0 UNSUPPORTED_RESULT=0 @@ -229,7 +235,7 @@ prlog() { # messages newline= shift fi - printf "$*$newline" + [ "$KTAP" != "1" ] && printf "$*$newline" [ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE } catlog() { #file @@ -260,11 +266,11 @@ TOTAL_RESULT=0 INSTANCE= CASENO=0 +CASENAME= testcase() { # testfile CASENO=$((CASENO+1)) - desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:` - prlog -n "[$CASENO]$INSTANCE$desc" + CASENAME=`grep "^#[ \t]*description:" $1 | cut -f2- -d:` } checkreq() { # testfile @@ -277,40 +283,68 @@ test_on_instance() { # testfile grep -q "^#[ \t]*flags:.*instance" $1 } +ktaptest() { # result comment + if [ "$KTAP" != "1" ]; then + return + fi + + local result= + if [ "$1" = "1" ]; then + result="ok" + else + result="not ok" + fi + shift + + local comment=$* + if [ "$comment" != "" ]; then + comment="# $comment" + fi + + echo $CASENO $result $INSTANCE$CASENAME $comment +} + eval_result() { # sigval case $1 in $PASS) prlog " [${color_green}PASS${color_reset}]" + ktaptest 1 PASSED_CASES="$PASSED_CASES $CASENO" return 0 ;; $FAIL) prlog " [${color_red}FAIL${color_reset}]" + ktaptest 0 FAILED_CASES="$FAILED_CASES $CASENO" return 1 # this is a bug. ;; $UNRESOLVED) prlog " [${color_blue}UNRESOLVED${color_reset}]" + ktaptest 0 UNRESOLVED UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO" return $UNRESOLVED_RESULT # depends on use case ;; $UNTESTED) prlog " [${color_blue}UNTESTED${color_reset}]" + ktaptest 1 SKIP UNTESTED_CASES="$UNTESTED_CASES $CASENO" return 0 ;; $UNSUPPORTED) prlog " [${color_blue}UNSUPPORTED${color_reset}]" + ktaptest 1 SKIP UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO" return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) prlog " [${color_green}XFAIL${color_reset}]" + ktaptest 1 XFAIL XFAILED_CASES="$XFAILED_CASES $CASENO" return 0 ;; *) prlog " [${color_blue}UNDEFINED${color_reset}]" + ktaptest 0 error UNDEFINED_CASES="$UNDEFINED_CASES $CASENO" return 1 # this must be a test bug ;; @@ -371,6 +405,7 @@ __run_test() { # testfile run_test() { # testfile local testname=`basename $1` testcase $1 + prlog -n "[$CASENO]$INSTANCE$CASENAME" if [ ! -z "$LOG_FILE" ] ; then local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX` else @@ -405,6 +440,17 @@ run_test() { # testfile # load in the helper functions . $TEST_DIR/functions +if [ "$KTAP" = "1" ]; then + echo "TAP version 13" + + casecount=`echo $TEST_CASES | wc -w` + for t in $TEST_CASES; do + test_on_instance $t || continue + casecount=$((casecount+1)) + done + echo "1..${casecount}" +fi + # Main loop for t in $TEST_CASES; do run_test $t @@ -439,6 +485,17 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w` prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w` prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w` +if [ "$KTAP" = "1" ]; then + echo -n "# Totals:" + echo -n " pass:"`echo $PASSED_CASES | wc -w` + echo -n " faii:"`echo $FAILED_CASES | wc -w` + echo -n " xfail:"`echo $XFAILED_CASES | wc -w` + echo -n " xpass:0" + echo -n " skip:"`echo $UNTESTED_CASES $UNSUPPORTED_CASES | wc -w` + echo -n " error:"`echo $UNRESOLVED_CASES $UNDEFINED_CASES | wc -w` + echo +fi + cleanup # if no error, return 0 diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap new file mode 100755 index 000000000000..b3284679ef3a --- /dev/null +++ b/tools/testing/selftests/ftrace/ftracetest-ktap @@ -0,0 +1,8 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0-only +# +# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner +# +# Copyright (C) Arm Ltd., 2023 + +./ftracetest -K diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index e2ff3bf4df80..2de7c61d1ae3 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -9,18 +9,33 @@ fail() { #msg exit_fail } -echo "Test event filter function name" +sample_events() { + echo > trace + echo 1 > events/kmem/kmem_cache_free/enable + echo 1 > tracing_on + ls > /dev/null + echo 0 > tracing_on + echo 0 > events/kmem/kmem_cache_free/enable +} + echo 0 > tracing_on echo 0 > events/enable + +echo "Get the most frequently calling function" +sample_events + +target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` +if [ -z "$target_func" ]; then + exit_fail +fi echo > trace -echo 'call_site.function == exit_mmap' > events/kmem/kmem_cache_free/filter -echo 1 > events/kmem/kmem_cache_free/enable -echo 1 > tracing_on -ls > /dev/null -echo 0 > events/kmem/kmem_cache_free/enable -hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l` -misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l` +echo "Test event filter function name" +echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter +sample_events + +hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` +misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` if [ $hitcnt -eq 0 ]; then exit_fail @@ -30,20 +45,14 @@ if [ $misscnt -gt 0 ]; then exit_fail fi -address=`grep ' exit_mmap$' /proc/kallsyms | cut -d' ' -f1` +address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1` echo "Test event filter function address" -echo 0 > tracing_on -echo 0 > events/enable -echo > trace echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter -echo 1 > events/kmem/kmem_cache_free/enable -echo 1 > tracing_on -sleep 1 -echo 0 > events/kmem/kmem_cache_free/enable +sample_events -hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l` -misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l` +hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` +misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` if [ $hitcnt -eq 0 ]; then exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc new file mode 100644 index 000000000000..d0cd91a93069 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test inter-event histogram trigger trace action with dynamic string param (legacy stack) +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[] stack' >> synthetic_events":README + +fail() { #msg + echo $1 + exit_fail +} + +echo "Test create synthetic event with stack" + +# Test the old stacktrace keyword (for backward compatibility) +echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events +echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger +echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger +echo 1 > events/synthetic/wake_lat/enable +sleep 1 + +if ! grep -q "=>.*sched" trace; then + fail "Failed to create synthetic event with stack" +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc index 755dbe94ccf4..8f1cc9a86a06 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action with dynamic string param -# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[]' >> synthetic_events":README +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "can be any field, or the special string 'common_stacktrace'":README fail() { #msg echo $1 @@ -10,9 +10,8 @@ fail() { #msg echo "Test create synthetic event with stack" - echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events -echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger +echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger echo 1 > events/synthetic/wake_lat/enable sleep 1 diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 9f539d454ee4..fa2ce2b9dd5f 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -389,6 +389,9 @@ create_chip chip create_bank chip bank set_num_lines chip bank 8 enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" remove_chip chip diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7a5ff646e7e7..4761b768b773 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test +TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c new file mode 100644 index 000000000000..4c416ebe7d66 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test edge cases and race conditions in kvm_recalculate_apic_map(). + */ + +#include <sys/ioctl.h> +#include <pthread.h> +#include <time.h> + +#include "processor.h" +#include "test_util.h" +#include "kvm_util.h" +#include "apic.h" + +#define TIMEOUT 5 /* seconds */ + +#define LAPIC_DISABLED 0 +#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) +#define MAX_XAPIC_ID 0xff + +static void *race(void *arg) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu = arg; + + while (1) { + /* Trigger kvm_recalculate_apic_map(). */ + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + pthread_testcancel(); + } + + return NULL; +} + +int main(void) +{ + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpuN; + struct kvm_vm *vm; + pthread_t thread; + time_t t; + int i; + + kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID); + + /* + * Create the max number of vCPUs supported by selftests so that KVM + * has decent amount of work to do when recalculating the map, i.e. to + * make the problematic window large enough to hit. + */ + vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus); + + /* + * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc + * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits). + */ + for (i = 0; i < KVM_MAX_VCPUS; i++) + vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); + + ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + + vcpuN = vcpus[KVM_MAX_VCPUS - 1]; + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC); + vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); + } + + ASSERT_EQ(pthread_cancel(thread), 0); + ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 80f06aa62034..f27a7338b60e 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -8,8 +8,10 @@ diag_uid fin_ack_lat gro hwtstamp_config +io_uring_zerocopy_tx ioam6_parser ip_defrag +ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr @@ -26,6 +28,7 @@ reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack rxtimestamp +sctp_hello sk_bind_sendto_listen sk_connect_zero_addr socket diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index a47b26ab48f2..0f5e88c8f4ff 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -2283,7 +2283,7 @@ EOF ################################################################################ # main -while getopts :t:pP46hv:w: o +while getopts :t:pP46hvw: o do case $o in t) TESTS=$OPTARG;; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 7da8ec838c63..35d89dfa6f11 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -68,7 +68,7 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del ns1 + ip netns del ns1 &> /dev/null ip netns del ns2 &> /dev/null } diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index a474c60fe348..9d0062b542e5 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -83,6 +83,7 @@ TEST_PROGS = bridge_igmp.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ + tc_flower_l2_miss.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh index fa6a88c50750..de2b2d5480dd 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh @@ -1,6 +1,28 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.555 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | | | | | | +# | + $h1 | | | | +# +----|-------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|-------------------------------+ + $swp2 | +# | | + $swp1 | 192.0.2.129/28 | +# | | vid 555 | 2001:db8:2::1/64 | +# | | | | +# | | + BR1 (802.1q) | | +# | | vid 555 pvid untagged | | +# | | 192.0.2.2/28 | | +# | | 2001:db8:1::2/64 | | +# | +----------------------------------+ | +# +---------------------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 @@ -41,7 +63,7 @@ h2_destroy() router_create() { - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh new file mode 100755 index 000000000000..37b0369b5246 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh @@ -0,0 +1,350 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | | +# +----|------------------+ +------------------|---+ +# | | +# +----|-------------------------------------------------------------------|---+ +# | SW | | | +# | +-|-------------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +-----------------------------------------------------------------------+ | +# +----------------------------------------------------------------------------+ + +ALL_TESTS=" + test_l2_miss_unicast + test_l2_miss_multicast + test_l2_miss_ll_multicast + test_l2_miss_broadcast +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +test_l2_miss_unicast() +{ + local dmac=00:01:02:03:04:05 + local dip=192.0.2.2 + local sip=192.0.2.1 + + RET=0 + + # Unknown unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \ + flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + # Known unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \ + flower indev $swp1 l2_miss false dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + # Before adding FDB entry. + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was not hit before adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Known unicast filter was hit before adding FDB entry" + + # Adding FDB entry. + bridge fdb replace $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was hit after adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was not hit after adding FDB entry" + + # Deleting FDB entry. + bridge fdb del $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unknown unicast filter was not hit after deleting FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was hit after deleting FDB entry" + + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower + + log_test "L2 miss - Unicast" +} + +test_l2_miss_multicast_common() +{ + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + # Unregistered multicast. + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss true src_ip $sip dst_ip $dip \ + action pass + # Registered multicast. + tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \ + flower indev $swp1 l2_miss false src_ip $sip dst_ip $dip \ + action pass + + # Before adding MDB entry. + $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was not hit before adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Registered multicast filter was hit before adding MDB entry" + + # Adding MDB entry. + bridge mdb replace dev br1 port $swp2 grp $dip permanent + + $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was hit after adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was not hit after adding MDB entry" + + # Deleting MDB entry. + bridge mdb del dev br1 port $swp2 grp $dip + + $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unregistered multicast filter was not hit after deleting MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was hit after deleting MDB entry" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Multicast ($name)" +} + +test_l2_miss_multicast_ipv4() +{ + local proto="ipv4" + local sip=192.0.2.1 + local dip=239.1.1.1 + local mode="-4" + local name="IPv4" + + test_l2_miss_multicast_common $proto $sip $dip $mode $name +} + +test_l2_miss_multicast_ipv6() +{ + local proto="ipv6" + local sip=2001:db8:1::1 + local dip=ff0e::1 + local mode="-6" + local name="IPv6" + + test_l2_miss_multicast_common $proto $sip $dip $mode $name +} + +test_l2_miss_multicast() +{ + # Configure $swp2 as a multicast router port so that it will forward + # both registered and unregistered multicast traffic. + bridge link set dev $swp2 mcast_router 2 + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip link set dev br1 type bridge mcast_querier 1 + ip -6 address add fe80::1/64 nodad dev br1 + # Wait the default Query Response Interval (10 seconds) for the bridge + # to determine that there are no other queriers in the network. + sleep 10 + + test_l2_miss_multicast_ipv4 + test_l2_miss_multicast_ipv6 + + ip -6 address del fe80::1/64 dev br1 + ip link set dev br1 type bridge mcast_querier 0 + bridge link set dev $swp2 mcast_router 1 +} + +test_l2_miss_multicast_common2() +{ + local name=$1; shift + local dmac=$1; shift + local dip=224.0.0.1 + local sip=192.0.2.1 + +} + +test_l2_miss_ll_multicast_common() +{ + local proto=$1; shift + local dmac=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss true dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Filter was not hit" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Link-local multicast ($name)" +} + +test_l2_miss_ll_multicast_ipv4() +{ + local proto=ipv4 + local dmac=01:00:5e:00:00:01 + local sip=192.0.2.1 + local dip=224.0.0.1 + local mode="-4" + local name="IPv4" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast_ipv6() +{ + local proto=ipv6 + local dmac=33:33:00:00:00:01 + local sip=2001:db8:1::1 + local dip=ff02::1 + local mode="-6" + local name="IPv6" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast() +{ + test_l2_miss_ll_multicast_ipv4 + test_l2_miss_ll_multicast_ipv6 +} + +test_l2_miss_broadcast() +{ + local dmac=ff:ff:ff:ff:ff:ff + local smac=00:01:02:03:04:05 + + RET=0 + + tc filter add dev $swp2 egress protocol all handle 101 pref 1 \ + flower l2_miss true dst_mac $dmac src_mac $smac \ + action pass + tc filter add dev $swp2 egress protocol all handle 102 pref 1 \ + flower l2_miss false dst_mac $dmac src_mac $smac \ + action pass + + $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? "L2 miss filter was hit when should not" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "L2 no miss filter was not hit when should" + + tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower + + log_test "L2 miss - Broadcast" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 43a723626126..7b936a926859 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -9,7 +9,7 @@ TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq -TEST_FILES := settings +TEST_FILES := mptcp_lib.sh settings EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index ef628b16fe9b..4eacdb1ab962 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns="ns1-$rndh" @@ -31,6 +33,8 @@ cleanup() ip netns del $ns } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index a43d3e2f59bb..c1f7bac19942 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + time_start=$(date +%s) optstring="S:R:d:e:l:r:h4cm:f:tC" @@ -141,6 +143,8 @@ cleanup() done } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 26310c17b4c6..85474e029784 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -10,6 +10,8 @@ # because it's invoked by variable name, see how the "tests" array is used #shellcheck disable=SC2317 +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" sinfail="" @@ -17,6 +19,7 @@ sout="" cin="" cinfail="" cinsent="" +tmpfile="" cout="" capout="" ns1="" @@ -34,6 +37,7 @@ evts_ns1="" evts_ns2="" evts_ns1_pid=0 evts_ns2_pid=0 +stats_dumped=0 declare -A all_tests declare -a only_tests_ids @@ -87,6 +91,7 @@ init_partial() fi done + stats_dumped=0 check_invert=0 validate_checksum=$checksum FAILING_LINKS="" @@ -136,6 +141,8 @@ cleanup_partial() check_tools() { + mptcp_lib_check_mptcp + if ! ip -Version &> /dev/null; then echo "SKIP: Could not run test without ip tool" exit $ksft_skip @@ -175,6 +182,7 @@ cleanup() { rm -f "$cin" "$cout" "$sinfail" rm -f "$sin" "$sout" "$cinsent" "$cinfail" + rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 cleanup_partial } @@ -347,6 +355,9 @@ fail_test() { ret=1 failed_tests[${TEST_COUNT}]="${TEST_NAME}" + + [ "${stats_dumped}" = 0 ] && dump_stats + stats_dumped=1 } get_failed_tests_ids() @@ -383,9 +394,16 @@ check_transfer() fail_test return 1 fi - bytes="--bytes=${bytes}" + + # note: BusyBox's "cmp" command doesn't support --bytes + tmpfile=$(mktemp) + head --bytes="$bytes" "$in" > "$tmpfile" + mv "$tmpfile" "$in" + head --bytes="$bytes" "$out" > "$tmpfile" + mv "$tmpfile" "$out" + tmpfile="" fi - cmp -l "$in" "$out" ${bytes} | while read -r i a b; do + cmp -l "$in" "$out" | while read -r i a b; do local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then echo "[ FAIL ] $what does not match (in, out):" @@ -849,7 +867,15 @@ do_transfer() sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id sleep 1 + sp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + da=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + dp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id + ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \ + lport $sp rip $da rport $dp token $tk fi counter=$((counter + 1)) @@ -915,6 +941,7 @@ do_transfer() sleep 1 sp=$(grep "type:10" "$evts_ns2" | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ rip $da rport $dp token $tk fi @@ -1120,7 +1147,6 @@ chk_csum_nr() local csum_ns1=${1:-0} local csum_ns2=${2:-0} local count - local dump_stats local extra_msg="" local allow_multi_errors_ns1=0 local allow_multi_errors_ns2=0 @@ -1144,7 +1170,6 @@ chk_csum_nr() { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns1" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1158,11 +1183,9 @@ chk_csum_nr() { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then echo "[fail] got $count data checksum error[s] expected $csum_ns2" fail_test - dump_stats=1 else echo -n "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats echo "$extra_msg" } @@ -1173,7 +1196,6 @@ chk_fail_nr() local fail_rx=$2 local ns_invert=${3:-""} local count - local dump_stats local ns_tx=$ns1 local ns_rx=$ns2 local extra_msg="" @@ -1205,7 +1227,6 @@ chk_fail_nr() { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1220,13 +1241,10 @@ chk_fail_nr() { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats - echo "$extra_msg" } @@ -1236,7 +1254,6 @@ chk_fclose_nr() local fclose_rx=$2 local ns_invert=$3 local count - local dump_stats local ns_tx=$ns2 local ns_rx=$ns1 local extra_msg=" " @@ -1254,7 +1271,6 @@ chk_fclose_nr() if [ "$count" != "$fclose_tx" ]; then echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1266,13 +1282,10 @@ chk_fclose_nr() if [ "$count" != "$fclose_rx" ]; then echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats - echo "$extra_msg" } @@ -1282,7 +1295,6 @@ chk_rst_nr() local rst_rx=$2 local ns_invert=${3:-""} local count - local dump_stats local ns_tx=$ns1 local ns_rx=$ns2 local extra_msg="" @@ -1299,7 +1311,6 @@ chk_rst_nr() if [ $count -lt $rst_tx ]; then echo "[fail] got $count MP_RST[s] TX expected $rst_tx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1310,13 +1321,10 @@ chk_rst_nr() if [ "$count" -lt "$rst_rx" ]; then echo "[fail] got $count MP_RST[s] RX expected $rst_rx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats - echo "$extra_msg" } @@ -1325,7 +1333,6 @@ chk_infi_nr() local infi_tx=$1 local infi_rx=$2 local count - local dump_stats printf "%-${nr_blank}s %s" " " "itx" count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}') @@ -1333,7 +1340,6 @@ chk_infi_nr() if [ "$count" != "$infi_tx" ]; then echo "[fail] got $count infinite map[s] TX expected $infi_tx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1344,12 +1350,9 @@ chk_infi_nr() if [ "$count" != "$infi_rx" ]; then echo "[fail] got $count infinite map[s] RX expected $infi_rx" fail_test - dump_stats=1 else echo "[ ok ]" fi - - [ "${dump_stats}" = 1 ] && dump_stats } chk_join_nr() @@ -1364,7 +1367,6 @@ chk_join_nr() local infi_nr=${8:-0} local corrupted_pkts=${9:-0} local count - local dump_stats local with_cookie local title="${TEST_NAME}" @@ -1378,7 +1380,6 @@ chk_join_nr() if [ "$count" != "$syn_nr" ]; then echo "[fail] got $count JOIN[s] syn expected $syn_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1396,7 +1397,6 @@ chk_join_nr() else echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" fail_test - dump_stats=1 fi else echo -n "[ ok ]" @@ -1408,11 +1408,9 @@ chk_join_nr() if [ "$count" != "$ack_nr" ]; then echo "[fail] got $count JOIN[s] ack expected $ack_nr" fail_test - dump_stats=1 else echo "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats if [ $validate_checksum -eq 1 ]; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1472,7 +1470,6 @@ chk_add_nr() local mis_syn_nr=${7:-0} local mis_ack_nr=${8:-0} local count - local dump_stats local timeout timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) @@ -1486,18 +1483,16 @@ chk_add_nr() if [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then echo "[fail] got $count ADD_ADDR[s] expected $add_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi echo -n " - echo " - count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}') + count=$(ip netns exec $ns1 nstat -as MPTcpExtEchoAdd | grep MPTcpExtEchoAdd | awk '{print $2}') [ -z "$count" ] && count=0 if [ "$count" != "$echo_nr" ]; then echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1509,7 +1504,6 @@ chk_add_nr() if [ "$count" != "$port_nr" ]; then echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" fail_test - dump_stats=1 else echo "[ ok ]" fi @@ -1522,7 +1516,6 @@ chk_add_nr() echo "[fail] got $count JOIN[s] syn with a different \ port-number expected $syn_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1535,7 +1528,6 @@ chk_add_nr() echo "[fail] got $count JOIN[s] synack with a different \ port-number expected $syn_ack_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1548,7 +1540,6 @@ chk_add_nr() echo "[fail] got $count JOIN[s] ack with a different \ port-number expected $ack_nr" fail_test - dump_stats=1 else echo "[ ok ]" fi @@ -1561,7 +1552,6 @@ chk_add_nr() echo "[fail] got $count JOIN[s] syn with a mismatched \ port-number expected $mis_syn_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1574,15 +1564,45 @@ chk_add_nr() echo "[fail] got $count JOIN[s] ack with a mismatched \ port-number expected $mis_ack_nr" fail_test - dump_stats=1 else echo "[ ok ]" fi else echo "" fi +} + +chk_add_tx_nr() +{ + local add_tx_nr=$1 + local echo_tx_nr=$2 + local timeout + local count + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + printf "%-${nr_blank}s %s" " " "add TX" + count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}') + [ -z "$count" ] && count=0 + + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr" + fail_test + else + echo -n "[ ok ]" + fi - [ "${dump_stats}" = 1 ] && dump_stats + echo -n " - echo TX " + count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ "$count" != "$echo_tx_nr" ]; then + echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" + fail_test + else + echo "[ ok ]" + fi } chk_rm_nr() @@ -1592,7 +1612,6 @@ chk_rm_nr() local invert local simult local count - local dump_stats local addr_ns=$ns1 local subflow_ns=$ns2 local extra_msg="" @@ -1614,12 +1633,11 @@ chk_rm_nr() fi printf "%-${nr_blank}s %s" " " "rm " - count=$(ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}') + count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}') [ -z "$count" ] && count=0 if [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1643,19 +1661,32 @@ chk_rm_nr() else echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" fail_test - dump_stats=1 fi return fi if [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" fail_test - dump_stats=1 else echo -n "[ ok ]" fi - [ "${dump_stats}" = 1 ] && dump_stats + echo "$extra_msg" +} + +chk_rm_tx_nr() +{ + local rm_addr_tx_nr=$1 + + printf "%-${nr_blank}s %s" " " "rm TX " + count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_addr_tx_nr" ]; then + echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr" + fail_test + else + echo -n "[ ok ]" + fi echo "$extra_msg" } @@ -1665,7 +1696,6 @@ chk_prio_nr() local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 local count - local dump_stats printf "%-${nr_blank}s %s" " " "ptx" count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') @@ -1673,7 +1703,6 @@ chk_prio_nr() if [ "$count" != "$mp_prio_nr_tx" ]; then echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" fail_test - dump_stats=1 else echo -n "[ ok ]" fi @@ -1684,12 +1713,9 @@ chk_prio_nr() if [ "$count" != "$mp_prio_nr_rx" ]; then echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" fail_test - dump_stats=1 else echo "[ ok ]" fi - - [ "${dump_stats}" = 1 ] && dump_stats } chk_subflow_nr() @@ -1721,7 +1747,6 @@ chk_subflow_nr() ss -N $ns1 -tOni ss -N $ns1 -tOni | grep token ip -n $ns1 mptcp endpoint - dump_stats fi } @@ -1761,7 +1786,6 @@ chk_mptcp_info() if [ "$dump_stats" = 1 ]; then ss -N $ns1 -inmHM ss -N $ns2 -inmHM - dump_stats fi } @@ -1939,6 +1963,7 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 + chk_add_tx_nr 1 1 chk_add_nr 1 1 fi @@ -2120,6 +2145,7 @@ add_addr_timeout_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow chk_join_nr 1 1 1 + chk_add_tx_nr 4 4 chk_add_nr 4 0 fi @@ -2165,6 +2191,7 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow chk_join_nr 1 1 1 + chk_rm_tx_nr 1 chk_rm_nr 1 1 fi @@ -2263,6 +2290,7 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr 3 3 3 + chk_rm_tx_nr 0 chk_rm_nr 0 3 simult fi @@ -3129,7 +3157,7 @@ userspace_tests() pm_nl_set_limits $ns1 0 1 run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow chk_join_nr 1 1 1 - chk_rm_nr 0 1 + chk_rm_nr 1 1 kill_events_pids fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh new file mode 100644 index 000000000000..3286536b79d5 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -0,0 +1,40 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly KSFT_FAIL=1 +readonly KSFT_SKIP=4 + +# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all +# features using the last version of the kernel and the selftests to make sure +# a test is not being skipped by mistake. +mptcp_lib_expect_all_features() { + [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ] +} + +# $1: msg +mptcp_lib_fail_if_expected_feature() { + if mptcp_lib_expect_all_features; then + echo "ERROR: missing feature: ${*}" + exit ${KSFT_FAIL} + fi + + return 1 +} + +# $1: file +mptcp_lib_has_file() { + local f="${1}" + + if [ -f "${f}" ]; then + return 0 + fi + + mptcp_lib_fail_if_expected_feature "${f} file not found" +} + +mptcp_lib_check_mptcp() { + if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then + echo "SKIP: MPTCP support is not available" + exit ${KSFT_SKIP} + fi +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 1b70c0a304ce..ff5adbb9c7f2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + ret=0 sin="" sout="" @@ -84,6 +86,8 @@ cleanup() rm -f "$sin" "$sout" } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 89839d1ff9d8..32f7533e0919 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + ksft_skip=4 ret=0 @@ -34,6 +36,8 @@ cleanup() ip netns del $ns1 } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 9f22f7e5027d..36a3c9d92e20 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,6 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) ns1="ns1-$rndh" @@ -34,6 +36,8 @@ cleanup() done } +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b1eb7bce599d..8092399d911f 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -1,6 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/mptcp_lib.sh" + +mptcp_lib_check_mptcp + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Cannot not run test without ip tool" diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 1003119773e5..f96282362811 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -232,10 +232,14 @@ setup_rt_networking() local nsname=rt-${rt} ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip link set veth-rt-${rt} netns ${nsname} ip -netns ${nsname} link set veth-rt-${rt} name veth0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -254,6 +258,12 @@ setup_hs() # set the networking for the host ip netns add ${hsname} + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 @@ -272,11 +282,6 @@ setup_hs() ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 - ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e699548d4247..eccea9845c65 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -15,6 +15,7 @@ #include <linux/tcp.h> #include <linux/socket.h> +#include <sys/epoll.h> #include <sys/types.h> #include <sys/sendfile.h> #include <sys/socket.h> @@ -1637,6 +1638,136 @@ TEST_F(tls_err, timeo) } } +TEST_F(tls_err, poll_partial_rec) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + + if (self->notls) + SKIP(return, "no TLS support"); + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); +} + +TEST_F(tls_err, epoll_partial_rec) +{ + struct epoll_event ev, events[10]; + ssize_t rec_len; + char rec[256]; + char buf[128]; + int epollfd; + + if (self->notls) + SKIP(return, "no TLS support"); + + epollfd = epoll_create1(0); + ASSERT_GE(epollfd, 0); + + memset(&ev, 0, sizeof(ev)); + ev.events = EPOLLIN; + ev.data.fd = self->cfd2; + ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0); + + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); + + close(epollfd); +} + +TEST_F(tls_err, poll_partial_rec_async) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + char token; + int p[2]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ASSERT_GE(pipe(p), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int status, pid2; + + close(p[1]); + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + + EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */ + + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), + rec_len - 100); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + close(p[0]); + + /* Child should sleep in poll(), never get a wake */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5), 0); + + EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5), 1); + + exit(!_metadata->passed); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 75af864e07b6..50aab6b57da3 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -17,6 +17,7 @@ ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ -fno-stack-protector -mrdrnd $(INCLUDES) TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx +TEST_FILES := $(OUTPUT)/test_encl.elf ifeq ($(CAN_BUILD_X86_64), 1) all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json index 44fbfc6caec7..e3d2de5c184f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -155,5 +155,28 @@ "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" ] - } + }, + { + "id": "0531", + "name": "Replace mq with invalid parent ID", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 16\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle ffff: mq" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent ffff:fff1 handle ffff: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent ffff", + "matchCount": "16", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } ] |