diff options
Diffstat (limited to 'tools')
614 files changed, 28771 insertions, 4779 deletions
diff --git a/tools/Makefile b/tools/Makefile index 7e42f7b8bfa7..bd778812e915 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -28,6 +28,7 @@ help: @echo ' pci - PCI tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' + @echo ' bootconfig - boot config tool' @echo ' spi - spi tools' @echo ' tmon - thermal monitoring and tuning tool' @echo ' turbostat - Intel CPU idle stats and freq reporting tool' @@ -63,7 +64,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE +cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging: FORCE $(call descend,$@) liblockdep: FORCE @@ -96,7 +97,7 @@ kvm_stat: FORCE $(call descend,kvm/$@) all: acpi cgroup cpupower gpio hv firewire liblockdep \ - perf selftests spi turbostat usb \ + perf selftests bootconfig spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat wmi \ pci debugging @@ -107,7 +108,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -141,7 +142,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: +cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -176,7 +177,7 @@ build_clean: $(call descend,build,clean) clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ - perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ + perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 820e5751ada7..ba85bb23f060 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -220,10 +220,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2) #define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1) -/* EL0 Virtual Timer Registers */ +/* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ #define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) -#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) /* KVM-as-firmware specific pseudo-registers */ #define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 4703d218663a..f83a70e07df8 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -19,5 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE3 #include <asm-generic/unistd.h> diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e9b62498fe75..f3327cb56edf 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -220,6 +220,7 @@ #define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ #define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ #define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -357,6 +358,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 8e1d0bb46361..4ea8584682f9 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -10,12 +10,6 @@ * cpu_feature_enabled(). */ -#ifdef CONFIG_X86_INTEL_MPX -# define DISABLE_MPX 0 -#else -# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) -#endif - #ifdef CONFIG_X86_SMAP # define DISABLE_SMAP 0 #else @@ -74,7 +68,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_MPX|DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 084e98da04a7..d5e517d1c3dd 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -512,6 +512,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 @@ -558,7 +560,14 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 @@ -566,11 +575,6 @@ #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 503d3f42da16..3f3f780c8c65 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -390,6 +390,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 +#define KVM_STATE_NESTED_MTF_PENDING 0x00000008 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 3eb8411ab60e..e95b72ec19bc 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -33,7 +33,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 -#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 @@ -94,7 +94,7 @@ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_INTERRUPT_WINDOW, "INTERRUPT_WINDOW" }, \ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 8908c58bd6cd..53adc1762ec0 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -929,7 +929,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/tools/bootconfig/.gitignore b/tools/bootconfig/.gitignore new file mode 100644 index 000000000000..e7644dfaa4a7 --- /dev/null +++ b/tools/bootconfig/.gitignore @@ -0,0 +1 @@ +bootconfig diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile new file mode 100644 index 000000000000..a6146ac64458 --- /dev/null +++ b/tools/bootconfig/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for bootconfig command + +bindir ?= /usr/bin + +HEADER = include/linux/bootconfig.h +CFLAGS = -Wall -g -I./include + +PROGS = bootconfig + +all: $(PROGS) + +bootconfig: ../../lib/bootconfig.c main.c $(HEADER) + $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ + +install: $(PROGS) + install bootconfig $(DESTDIR)$(bindir) + +test: bootconfig + ./test-bootconfig.sh + +clean: + $(RM) -f *.o bootconfig diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h new file mode 100644 index 000000000000..078cbd2ba651 --- /dev/null +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H +#define _BOOTCONFIG_LINUX_BOOTCONFIG_H + +#include "../../../../include/linux/bootconfig.h" + +#endif diff --git a/tools/bootconfig/include/linux/bug.h b/tools/bootconfig/include/linux/bug.h new file mode 100644 index 000000000000..7b65a389c0dd --- /dev/null +++ b/tools/bootconfig/include/linux/bug.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_BUG_H +#define _SKC_LINUX_BUG_H + +#include <stdio.h> +#include <stdlib.h> + +#define WARN_ON(cond) \ + ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ + __FILE__, __LINE__, __func__, #cond) : 0) + +#endif diff --git a/tools/bootconfig/include/linux/ctype.h b/tools/bootconfig/include/linux/ctype.h new file mode 100644 index 000000000000..c56ecc136448 --- /dev/null +++ b/tools/bootconfig/include/linux/ctype.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_CTYPE_H +#define _SKC_LINUX_CTYPE_H + +#include <ctype.h> + +#endif diff --git a/tools/bootconfig/include/linux/errno.h b/tools/bootconfig/include/linux/errno.h new file mode 100644 index 000000000000..5d9f91ec2fda --- /dev/null +++ b/tools/bootconfig/include/linux/errno.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_ERRNO_H +#define _SKC_LINUX_ERRNO_H + +#include <asm/errno.h> + +#endif diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h new file mode 100644 index 000000000000..2d93320aa374 --- /dev/null +++ b/tools/bootconfig/include/linux/kernel.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_KERNEL_H +#define _SKC_LINUX_KERNEL_H + +#include <stdlib.h> +#include <stdbool.h> + +#include <linux/printk.h> + +typedef unsigned short u16; +typedef unsigned int u32; + +#define unlikely(cond) (cond) + +#define __init +#define __initdata + +#endif diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h new file mode 100644 index 000000000000..7862f217d85d --- /dev/null +++ b/tools/bootconfig/include/linux/memblock.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XBC_LINUX_MEMBLOCK_H +#define _XBC_LINUX_MEMBLOCK_H + +#include <stdlib.h> + +#define __pa(addr) (addr) +#define SMP_CACHE_BYTES 0 +#define memblock_alloc(size, align) malloc(size) +#define memblock_free(paddr, size) free(paddr) + +#endif diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h new file mode 100644 index 000000000000..036e667596eb --- /dev/null +++ b/tools/bootconfig/include/linux/printk.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_PRINTK_H +#define _SKC_LINUX_PRINTK_H + +#include <stdio.h> + +#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__) + +#define pr_err printk +#define pr_warn printk +#define pr_info printk +#define pr_debug printk + +#endif diff --git a/tools/bootconfig/include/linux/string.h b/tools/bootconfig/include/linux/string.h new file mode 100644 index 000000000000..8267af75153a --- /dev/null +++ b/tools/bootconfig/include/linux/string.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SKC_LINUX_STRING_H +#define _SKC_LINUX_STRING_H + +#include <string.h> + +/* Copied from lib/string.c */ +static inline char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +static inline char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +#endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c new file mode 100644 index 000000000000..a9b97814d1a9 --- /dev/null +++ b/tools/bootconfig/main.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Boot config tool for initrd image + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> + +#include <linux/kernel.h> +#include <linux/bootconfig.h> + +static int xbc_show_array(struct xbc_node *node) +{ + const char *val; + int i = 0; + + xbc_array_for_each_value(node, val) { + printf("\"%s\"%s", val, node->next ? ", " : ";\n"); + i++; + } + return i; +} + +static void xbc_show_compact_tree(void) +{ + struct xbc_node *node, *cnode; + int depth = 0, i; + + node = xbc_root_node(); + while (node && xbc_node_is_key(node)) { + for (i = 0; i < depth; i++) + printf("\t"); + cnode = xbc_node_get_child(node); + while (cnode && xbc_node_is_key(cnode) && !cnode->next) { + printf("%s.", xbc_node_get_data(node)); + node = cnode; + cnode = xbc_node_get_child(node); + } + if (cnode && xbc_node_is_key(cnode)) { + printf("%s {\n", xbc_node_get_data(node)); + depth++; + node = cnode; + continue; + } else if (cnode && xbc_node_is_value(cnode)) { + printf("%s = ", xbc_node_get_data(node)); + if (cnode->next) + xbc_show_array(cnode); + else + printf("\"%s\";\n", xbc_node_get_data(cnode)); + } else { + printf("%s;\n", xbc_node_get_data(node)); + } + + if (node->next) { + node = xbc_node_get_next(node); + continue; + } + while (!node->next) { + node = xbc_node_get_parent(node); + if (!node) + return; + if (!xbc_node_get_child(node)->next) + continue; + depth--; + for (i = 0; i < depth; i++) + printf("\t"); + printf("}\n"); + } + node = xbc_node_get_next(node); + } +} + +/* Simple real checksum */ +int checksum(unsigned char *buf, int len) +{ + int i, sum = 0; + + for (i = 0; i < len; i++) + sum += buf[i]; + + return sum; +} + +#define PAGE_SIZE 4096 + +int load_xbc_fd(int fd, char **buf, int size) +{ + int ret; + + *buf = malloc(size + 1); + if (!*buf) + return -ENOMEM; + + ret = read(fd, *buf, size); + if (ret < 0) + return -errno; + (*buf)[size] = '\0'; + + return ret; +} + +/* Return the read size or -errno */ +int load_xbc_file(const char *path, char **buf) +{ + struct stat stat; + int fd, ret; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + ret = fstat(fd, &stat); + if (ret < 0) + return -errno; + + ret = load_xbc_fd(fd, buf, stat.st_size); + + close(fd); + + return ret; +} + +int load_xbc_from_initrd(int fd, char **buf) +{ + struct stat stat; + int ret; + u32 size = 0, csum = 0, rcsum; + char magic[BOOTCONFIG_MAGIC_LEN]; + + ret = fstat(fd, &stat); + if (ret < 0) + return -errno; + + if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) + return 0; + + if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) { + pr_err("Failed to lseek: %d\n", -errno); + return -errno; + } + if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0) + return -errno; + /* Check the bootconfig magic bytes */ + if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) + return 0; + + if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) { + pr_err("Failed to lseek: %d\n", -errno); + return -errno; + } + + if (read(fd, &size, sizeof(u32)) < 0) + return -errno; + + if (read(fd, &csum, sizeof(u32)) < 0) + return -errno; + + /* Wrong size error */ + if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { + pr_err("bootconfig size is too big\n"); + return -E2BIG; + } + + if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), + SEEK_SET) < 0) { + pr_err("Failed to lseek: %d\n", -errno); + return -errno; + } + + ret = load_xbc_fd(fd, buf, size); + if (ret < 0) + return ret; + + /* Wrong Checksum */ + rcsum = checksum((unsigned char *)*buf, size); + if (csum != rcsum) { + pr_err("checksum error: %d != %d\n", csum, rcsum); + return -EINVAL; + } + + ret = xbc_init(*buf); + /* Wrong data */ + if (ret < 0) + return ret; + + return size; +} + +int show_xbc(const char *path) +{ + int ret, fd; + char *buf = NULL; + + fd = open(path, O_RDONLY); + if (fd < 0) { + pr_err("Failed to open initrd %s: %d\n", path, fd); + return -errno; + } + + ret = load_xbc_from_initrd(fd, &buf); + if (ret < 0) + pr_err("Failed to load a boot config from initrd: %d\n", ret); + else + xbc_show_compact_tree(); + + close(fd); + free(buf); + + return ret; +} + +int delete_xbc(const char *path) +{ + struct stat stat; + int ret = 0, fd, size; + char *buf = NULL; + + fd = open(path, O_RDWR); + if (fd < 0) { + pr_err("Failed to open initrd %s: %d\n", path, fd); + return -errno; + } + + size = load_xbc_from_initrd(fd, &buf); + if (size < 0) { + ret = size; + pr_err("Failed to load a boot config from initrd: %d\n", ret); + } else if (size > 0) { + ret = fstat(fd, &stat); + if (!ret) + ret = ftruncate(fd, stat.st_size + - size - 8 - BOOTCONFIG_MAGIC_LEN); + if (ret) + ret = -errno; + } /* Ignore if there is no boot config in initrd */ + + close(fd); + free(buf); + + return ret; +} + +int apply_xbc(const char *path, const char *xbc_path) +{ + u32 size, csum; + char *buf, *data; + int ret, fd; + + ret = load_xbc_file(xbc_path, &buf); + if (ret < 0) { + pr_err("Failed to load %s : %d\n", xbc_path, ret); + return ret; + } + size = strlen(buf) + 1; + csum = checksum((unsigned char *)buf, size); + + /* Prepare xbc_path data */ + data = malloc(size + 8); + if (!data) + return -ENOMEM; + strcpy(data, buf); + *(u32 *)(data + size) = size; + *(u32 *)(data + size + 4) = csum; + + /* Check the data format */ + ret = xbc_init(buf); + if (ret < 0) { + pr_err("Failed to parse %s: %d\n", xbc_path, ret); + free(data); + free(buf); + return ret; + } + printf("Apply %s to %s\n", xbc_path, path); + printf("\tNumber of nodes: %d\n", ret); + printf("\tSize: %u bytes\n", (unsigned int)size); + printf("\tChecksum: %d\n", (unsigned int)csum); + + /* TODO: Check the options by schema */ + xbc_destroy_all(); + free(buf); + + /* Remove old boot config if exists */ + ret = delete_xbc(path); + if (ret < 0) { + pr_err("Failed to delete previous boot config: %d\n", ret); + return ret; + } + + /* Apply new one */ + fd = open(path, O_RDWR | O_APPEND); + if (fd < 0) { + pr_err("Failed to open %s: %d\n", path, fd); + return fd; + } + /* TODO: Ensure the @path is initramfs/initrd image */ + ret = write(fd, data, size + 8); + if (ret < 0) { + pr_err("Failed to apply a boot config: %d\n", ret); + return ret; + } + /* Write a magic word of the bootconfig */ + ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + if (ret < 0) { + pr_err("Failed to apply a boot config magic: %d\n", ret); + return ret; + } + close(fd); + free(data); + + return 0; +} + +int usage(void) +{ + printf("Usage: bootconfig [OPTIONS] <INITRD>\n" + " Apply, delete or show boot config to initrd.\n" + " Options:\n" + " -a <config>: Apply boot config to initrd\n" + " -d : Delete boot config file from initrd\n\n" + " If no option is given, show current applied boot config.\n"); + return -1; +} + +int main(int argc, char **argv) +{ + char *path = NULL; + char *apply = NULL; + bool delete = false; + int opt; + + while ((opt = getopt(argc, argv, "hda:")) != -1) { + switch (opt) { + case 'd': + delete = true; + break; + case 'a': + apply = optarg; + break; + case 'h': + default: + return usage(); + } + } + + if (apply && delete) { + pr_err("Error: You can not specify both -a and -d at once.\n"); + return usage(); + } + + if (optind >= argc) { + pr_err("Error: No initrd is specified.\n"); + return usage(); + } + + path = argv[optind]; + + if (apply) + return apply_xbc(path, apply); + else if (delete) + return delete_xbc(path); + + return show_xbc(path); +} diff --git a/tools/bootconfig/samples/bad-array-space-comment.bconf b/tools/bootconfig/samples/bad-array-space-comment.bconf new file mode 100644 index 000000000000..fda19e47d0db --- /dev/null +++ b/tools/bootconfig/samples/bad-array-space-comment.bconf @@ -0,0 +1,5 @@ +key = # comment + "value1", # comment1 + "value2" # comment2 +, + "value3" diff --git a/tools/bootconfig/samples/bad-array.bconf b/tools/bootconfig/samples/bad-array.bconf new file mode 100644 index 000000000000..0174af019d7f --- /dev/null +++ b/tools/bootconfig/samples/bad-array.bconf @@ -0,0 +1,2 @@ +# Array must be comma separated. +key = "value1" "value2" diff --git a/tools/bootconfig/samples/bad-dotword.bconf b/tools/bootconfig/samples/bad-dotword.bconf new file mode 100644 index 000000000000..ba5557b2bdd3 --- /dev/null +++ b/tools/bootconfig/samples/bad-dotword.bconf @@ -0,0 +1,4 @@ +# do not start keyword with . +key { + .word = 1 +} diff --git a/tools/bootconfig/samples/bad-empty.bconf b/tools/bootconfig/samples/bad-empty.bconf new file mode 100644 index 000000000000..2ba3f6cc6a47 --- /dev/null +++ b/tools/bootconfig/samples/bad-empty.bconf @@ -0,0 +1 @@ +# Wrong boot config: comment only diff --git a/tools/bootconfig/samples/bad-keyerror.bconf b/tools/bootconfig/samples/bad-keyerror.bconf new file mode 100644 index 000000000000..b6e247a099d0 --- /dev/null +++ b/tools/bootconfig/samples/bad-keyerror.bconf @@ -0,0 +1,2 @@ +# key word can not contain "," +key,word diff --git a/tools/bootconfig/samples/bad-longkey.bconf b/tools/bootconfig/samples/bad-longkey.bconf new file mode 100644 index 000000000000..eb97369f91a8 --- /dev/null +++ b/tools/bootconfig/samples/bad-longkey.bconf @@ -0,0 +1 @@ +key_word_is_too_long01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 diff --git a/tools/bootconfig/samples/bad-manywords.bconf b/tools/bootconfig/samples/bad-manywords.bconf new file mode 100644 index 000000000000..8db81967c48a --- /dev/null +++ b/tools/bootconfig/samples/bad-manywords.bconf @@ -0,0 +1 @@ +key1.is2.too3.long4.5.6.7.8.9.10.11.12.13.14.15.16.17 diff --git a/tools/bootconfig/samples/bad-mixed-kv1.bconf b/tools/bootconfig/samples/bad-mixed-kv1.bconf new file mode 100644 index 000000000000..1761547dd05c --- /dev/null +++ b/tools/bootconfig/samples/bad-mixed-kv1.bconf @@ -0,0 +1,3 @@ +# value -> subkey pattern +key = value +key.subkey = another-value diff --git a/tools/bootconfig/samples/bad-mixed-kv2.bconf b/tools/bootconfig/samples/bad-mixed-kv2.bconf new file mode 100644 index 000000000000..6b32e0c3878c --- /dev/null +++ b/tools/bootconfig/samples/bad-mixed-kv2.bconf @@ -0,0 +1,3 @@ +# subkey -> value pattern +key.subkey = value +key = another-value diff --git a/tools/bootconfig/samples/bad-no-keyword.bconf b/tools/bootconfig/samples/bad-no-keyword.bconf new file mode 100644 index 000000000000..eff26808566c --- /dev/null +++ b/tools/bootconfig/samples/bad-no-keyword.bconf @@ -0,0 +1,2 @@ +# No keyword +{} diff --git a/tools/bootconfig/samples/bad-nonprintable.bconf b/tools/bootconfig/samples/bad-nonprintable.bconf new file mode 100644 index 000000000000..3bb1a2864e52 --- /dev/null +++ b/tools/bootconfig/samples/bad-nonprintable.bconf @@ -0,0 +1,2 @@ +# Non printable +key = "" diff --git a/tools/bootconfig/samples/bad-samekey.bconf b/tools/bootconfig/samples/bad-samekey.bconf new file mode 100644 index 000000000000..e8d983a4563c --- /dev/null +++ b/tools/bootconfig/samples/bad-samekey.bconf @@ -0,0 +1,6 @@ +# Same key value is not allowed +key { + foo = value + bar = value2 +} +key.foo = value diff --git a/tools/bootconfig/samples/bad-spaceword.bconf b/tools/bootconfig/samples/bad-spaceword.bconf new file mode 100644 index 000000000000..90c703d32a9a --- /dev/null +++ b/tools/bootconfig/samples/bad-spaceword.bconf @@ -0,0 +1,2 @@ +# No space between words +key . word diff --git a/tools/bootconfig/samples/bad-tree.bconf b/tools/bootconfig/samples/bad-tree.bconf new file mode 100644 index 000000000000..5a6038edcd55 --- /dev/null +++ b/tools/bootconfig/samples/bad-tree.bconf @@ -0,0 +1,5 @@ +# brace is not closing +tree { + node { + value = 1 +} diff --git a/tools/bootconfig/samples/bad-value.bconf b/tools/bootconfig/samples/bad-value.bconf new file mode 100644 index 000000000000..a1217fed86cc --- /dev/null +++ b/tools/bootconfig/samples/bad-value.bconf @@ -0,0 +1,3 @@ +# Quotes error +value = "data + diff --git a/tools/bootconfig/samples/escaped.bconf b/tools/bootconfig/samples/escaped.bconf new file mode 100644 index 000000000000..9f72043b3216 --- /dev/null +++ b/tools/bootconfig/samples/escaped.bconf @@ -0,0 +1,3 @@ +key1 = "A\B\C" +key2 = '\'\'' +key3 = "\\" diff --git a/tools/bootconfig/samples/good-array-space-comment.bconf b/tools/bootconfig/samples/good-array-space-comment.bconf new file mode 100644 index 000000000000..45b938dc0695 --- /dev/null +++ b/tools/bootconfig/samples/good-array-space-comment.bconf @@ -0,0 +1,4 @@ +key = # comment + "value1", # comment1 + "value2" , # comment2 + "value3" diff --git a/tools/bootconfig/samples/good-comment-after-value.bconf b/tools/bootconfig/samples/good-comment-after-value.bconf new file mode 100644 index 000000000000..0d92a853df72 --- /dev/null +++ b/tools/bootconfig/samples/good-comment-after-value.bconf @@ -0,0 +1 @@ +key = "value" # comment diff --git a/tools/bootconfig/samples/good-printables.bconf b/tools/bootconfig/samples/good-printables.bconf new file mode 100644 index 000000000000..ebb985a66ed8 --- /dev/null +++ b/tools/bootconfig/samples/good-printables.bconf @@ -0,0 +1,2 @@ +key = " + !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" diff --git a/tools/bootconfig/samples/good-simple.bconf b/tools/bootconfig/samples/good-simple.bconf new file mode 100644 index 000000000000..37dd6d21c176 --- /dev/null +++ b/tools/bootconfig/samples/good-simple.bconf @@ -0,0 +1,11 @@ +# A good simple bootconfig + +key.word1 = 1 +key.word2=2 +key.word3 = 3; + +key { +word4 = 4 } + +key { word5 = 5; word6 = 6 } + diff --git a/tools/bootconfig/samples/good-single.bconf b/tools/bootconfig/samples/good-single.bconf new file mode 100644 index 000000000000..98e55ad8b711 --- /dev/null +++ b/tools/bootconfig/samples/good-single.bconf @@ -0,0 +1,4 @@ +# single key style +key = 1 +key2 = 2 +key3 = "alpha", "beta" diff --git a/tools/bootconfig/samples/good-space-after-value.bconf b/tools/bootconfig/samples/good-space-after-value.bconf new file mode 100644 index 000000000000..56c15cbc5741 --- /dev/null +++ b/tools/bootconfig/samples/good-space-after-value.bconf @@ -0,0 +1 @@ +key = "value" diff --git a/tools/bootconfig/samples/good-tree.bconf b/tools/bootconfig/samples/good-tree.bconf new file mode 100644 index 000000000000..f2ddefc8b52a --- /dev/null +++ b/tools/bootconfig/samples/good-tree.bconf @@ -0,0 +1,12 @@ +key { + word { + tree { + value = "0"} + } + word2 { + tree { + value = 1,2 } + } +} +other.tree { + value = 2; value2 = 3;} diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh new file mode 100755 index 000000000000..1411f4c3454f --- /dev/null +++ b/tools/bootconfig/test-bootconfig.sh @@ -0,0 +1,126 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +echo "Boot config test script" + +BOOTCONF=./bootconfig +INITRD=`mktemp initrd-XXXX` +TEMPCONF=`mktemp temp-XXXX.bconf` +NG=0 + +cleanup() { + rm -f $INITRD $TEMPCONF $OUTFILE + exit $NG +} + +trap cleanup EXIT TERM + +NO=1 + +xpass() { # pass test command + echo "test case $NO ($3)... " + if ! ($@ && echo "\t\t[OK]"); then + echo "\t\t[NG]"; NG=$((NG + 1)) + fi + NO=$((NO + 1)) +} + +xfail() { # fail test command + echo "test case $NO ($3)... " + if ! (! $@ && echo "\t\t[OK]"); then + echo "\t\t[NG]"; NG=$((NG + 1)) + fi + NO=$((NO + 1)) +} + +echo "Basic command test" +xpass $BOOTCONF $INITRD + +echo "Delete command should success without bootconfig" +xpass $BOOTCONF -d $INITRD + +dd if=/dev/zero of=$INITRD bs=4096 count=1 +echo "key = value;" > $TEMPCONF +bconf_size=$(stat -c %s $TEMPCONF) +initrd_size=$(stat -c %s $INITRD) + +echo "Apply command test" +xpass $BOOTCONF -a $TEMPCONF $INITRD +new_size=$(stat -c %s $INITRD) + +echo "File size check" +xpass test $new_size -eq $(expr $bconf_size + $initrd_size + 9 + 12) + +echo "Apply command repeat test" +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "File size check" +xpass test $new_size -eq $(stat -c %s $INITRD) + +echo "Delete command check" +xpass $BOOTCONF -d $INITRD + +echo "File size check" +new_size=$(stat -c %s $INITRD) +xpass test $new_size -eq $initrd_size + +echo "No error messge while applying" +OUTFILE=`mktemp tempout-XXXX` +dd if=/dev/zero of=$INITRD bs=4096 count=1 +printf " \0\0\0 \0\0\0" >> $INITRD +$BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1 +xfail grep -i "failed" $OUTFILE +xfail grep -i "error" $OUTFILE + +echo "Max node number check" + +echo -n > $TEMPCONF +for i in `seq 1 1024` ; do + echo "node$i" >> $TEMPCONF +done +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "badnode" >> $TEMPCONF +xfail $BOOTCONF -a $TEMPCONF $INITRD + +echo "Max filesize check" + +# Max size is 32767 (including terminal byte) +echo -n "data = \"" > $TEMPCONF +dd if=/dev/urandom bs=768 count=32 | base64 -w0 >> $TEMPCONF +echo "\"" >> $TEMPCONF +xfail $BOOTCONF -a $TEMPCONF $INITRD + +truncate -s 32764 $TEMPCONF +echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0') +xpass $BOOTCONF -a $TEMPCONF $INITRD + +echo "Adding same-key values" +cat > $TEMPCONF << EOF +key = bar, baz +key += qux +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xpass grep -q "bar" $OUTFILE +xpass grep -q "baz" $OUTFILE +xpass grep -q "qux" $OUTFILE + +echo "=== expected failure cases ===" +for i in samples/bad-* ; do + xfail $BOOTCONF -a $i $INITRD +done + +echo "=== expected success cases ===" +for i in samples/good-* ; do + xpass $BOOTCONF -a $i $INITRD +done + +echo +if [ $NG -eq 0 ]; then + echo "All tests passed" +else + echo "$NG tests failed" +fi diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 5535650800ab..f897eeeb0b4f 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -73,7 +73,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm -all: $(PROGS) bpftool +all: $(PROGS) bpftool runqslower $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm' $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean +clean: bpftool_clean runqslower_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* @@ -97,7 +97,7 @@ clean: bpftool_clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -r -- $(OUTPUT)feature -install: $(PROGS) bpftool_install +install: $(PROGS) bpftool_install runqslower_install $(call QUIET_INSTALL, bpf_jit_disasm) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm @@ -115,4 +115,14 @@ bpftool_install: bpftool_clean: $(call descend,bpftool,clean) -.PHONY: all install clean bpftool bpftool_install bpftool_clean +runqslower: + $(call descend,runqslower) + +runqslower_install: + $(call descend,runqslower,install) + +runqslower_clean: + $(call descend,runqslower,clean) + +.PHONY: all install clean bpftool bpftool_install bpftool_clean \ + runqslower runqslower_install runqslower_clean diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst new file mode 100644 index 000000000000..94d91322895a --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -0,0 +1,305 @@ +================ +bpftool-gen +================ +------------------------------------------------------------------------------- +tool for BPF code-generation +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **gen** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMAND* := { **skeleton | **help** } + +GEN COMMANDS +============= + +| **bpftool** **gen skeleton** *FILE* +| **bpftool** **gen help** + +DESCRIPTION +=========== + **bpftool gen skeleton** *FILE* + Generate BPF skeleton C header file for a given *FILE*. + + BPF skeleton is an alternative interface to existing libbpf + APIs for working with BPF objects. Skeleton code is intended + to significantly shorten and simplify code to load and work + with BPF programs from userspace side. Generated code is + tailored to specific input BPF object *FILE*, reflecting its + structure by listing out available maps, program, variables, + etc. Skeleton eliminates the need to lookup mentioned + components by name. Instead, if skeleton instantiation + succeeds, they are populated in skeleton structure as valid + libbpf types (e.g., struct bpf_map pointer) and can be + passed to existing generic libbpf APIs. + + In addition to simple and reliable access to maps and + programs, skeleton provides a storage for BPF links (struct + bpf_link) for each BPF program within BPF object. When + requested, supported BPF programs will be automatically + attached and resulting BPF links stored for further use by + user in pre-allocated fields in skeleton struct. For BPF + programs that can't be automatically attached by libbpf, + user can attach them manually, but store resulting BPF link + in per-program link field. All such set up links will be + automatically destroyed on BPF skeleton destruction. This + eliminates the need for users to manage links manually and + rely on libbpf support to detach programs and free up + resources. + + Another facility provided by BPF skeleton is an interface to + global variables of all supported kinds: mutable, read-only, + as well as extern ones. This interface allows to pre-setup + initial values of variables before BPF object is loaded and + verified by kernel. For non-read-only variables, the same + interface can be used to fetch values of global variables on + userspace side, even if they are modified by BPF code. + + During skeleton generation, contents of source BPF object + *FILE* is embedded within generated code and is thus not + necessary to keep around. This ensures skeleton and BPF + object file are matching 1-to-1 and always stay in sync. + Generated code is dual-licensed under LGPL-2.1 and + BSD-2-Clause licenses. + + It is a design goal and guarantee that skeleton interfaces + are interoperable with generic libbpf APIs. User should + always be able to use skeleton API to create and load BPF + object, and later use libbpf APIs to keep working with + specific maps, programs, etc. + + As part of skeleton, few custom functions are generated. + Each of them is prefixed with object name, derived from + object file name. I.e., if BPF object file name is + **example.o**, BPF object name will be **example**. The + following custom functions are provided in such case: + + - **example__open** and **example__open_opts**. + These functions are used to instantiate skeleton. It + corresponds to libbpf's **bpf_object__open()** API. + **_opts** variants accepts extra **bpf_object_open_opts** + options. + + - **example__load**. + This function creates maps, loads and verifies BPF + programs, initializes global data maps. It corresponds to + libppf's **bpf_object__load** API. + + - **example__open_and_load** combines **example__open** and + **example__load** invocations in one commonly used + operation. + + - **example__attach** and **example__detach** + This pair of functions allow to attach and detach, + correspondingly, already loaded BPF object. Only BPF + programs of types supported by libbpf for auto-attachment + will be auto-attached and their corresponding BPF links + instantiated. For other BPF programs, user can manually + create a BPF link and assign it to corresponding fields in + skeleton struct. **example__detach** will detach both + links created automatically, as well as those populated by + user manually. + + - **example__destroy** + Detach and unload BPF programs, free up all the resources + used by skeleton and BPF object. + + If BPF object has global variables, corresponding structs + with memory layout corresponding to global data data section + layout will be created. Currently supported ones are: *.data*, + *.bss*, *.rodata*, and *.kconfig* structs/data sections. + These data sections/structs can be used to set up initial + values of variables, if set before **example__load**. + Afterwards, if target kernel supports memory-mapped BPF + arrays, same structs can be used to fetch and update + (non-read-only) data from userspace, with same simplicity + as for BPF side. + + **bpftool gen help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, + this option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -d, --debug + Print all logs available from libbpf, including debug-level + information. + +EXAMPLES +======== +**$ cat example.c** +:: + + #include <stdbool.h> + #include <linux/ptrace.h> + #include <linux/bpf.h> + #include "bpf_helpers.h" + + const volatile int param1 = 42; + bool global_flag = true; + struct { int x; } data = {}; + + struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, int); + __type(value, long); + } my_map SEC(".maps"); + + SEC("raw_tp/sys_enter") + int handle_sys_enter(struct pt_regs *ctx) + { + static long my_static_var; + if (global_flag) + my_static_var++; + else + data.x += param1; + return 0; + } + + SEC("raw_tp/sys_exit") + int handle_sys_exit(struct pt_regs *ctx) + { + int zero = 0; + bpf_map_lookup_elem(&my_map, &zero); + return 0; + } + +This is example BPF application with two BPF programs and a mix of BPF maps +and global variables. + +**$ bpftool gen skeleton example.o** +:: + + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + + /* THIS FILE IS AUTOGENERATED! */ + #ifndef __EXAMPLE_SKEL_H__ + #define __EXAMPLE_SKEL_H__ + + #include <stdlib.h> + #include <bpf/libbpf.h> + + struct example { + struct bpf_object_skeleton *skeleton; + struct bpf_object *obj; + struct { + struct bpf_map *rodata; + struct bpf_map *data; + struct bpf_map *bss; + struct bpf_map *my_map; + } maps; + struct { + struct bpf_program *handle_sys_enter; + struct bpf_program *handle_sys_exit; + } progs; + struct { + struct bpf_link *handle_sys_enter; + struct bpf_link *handle_sys_exit; + } links; + struct example__bss { + struct { + int x; + } data; + } *bss; + struct example__data { + _Bool global_flag; + long int handle_sys_enter_my_static_var; + } *data; + struct example__rodata { + int param1; + } *rodata; + }; + + static void example__destroy(struct example *obj); + static inline struct example *example__open_opts( + const struct bpf_object_open_opts *opts); + static inline struct example *example__open(); + static inline int example__load(struct example *obj); + static inline struct example *example__open_and_load(); + static inline int example__attach(struct example *obj); + static inline void example__detach(struct example *obj); + + #endif /* __EXAMPLE_SKEL_H__ */ + +**$ cat example_user.c** +:: + + #include "example.skel.h" + + int main() + { + struct example *skel; + int err = 0; + + skel = example__open(); + if (!skel) + goto cleanup; + + skel->rodata->param1 = 128; + + err = example__load(skel); + if (err) + goto cleanup; + + err = example__attach(skel); + if (err) + goto cleanup; + + /* all libbpf APIs are usable */ + printf("my_map name: %s\n", bpf_map__name(skel->maps.my_map)); + printf("sys_enter prog FD: %d\n", + bpf_program__fd(skel->progs.handle_sys_enter)); + + /* detach and re-attach sys_exit program */ + bpf_link__destroy(skel->links.handle_sys_exit); + skel->links.handle_sys_exit = + bpf_program__attach(skel->progs.handle_sys_exit); + + printf("my_static_var: %ld\n", + skel->bss->handle_sys_enter_my_static_var); + + cleanup: + example__destroy(skel); + return err; + } + +**# ./example_user** +:: + + my_map name: my_map + sys_enter prog FD: 8 + my_static_var: 7 + +This is a stripped-out version of skeleton generated for above example code. + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 1c0f7146aab0..cdeae8ae90ba 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -39,9 +39,9 @@ MAP COMMANDS | **bpftool** **map freeze** *MAP* | **bpftool** **map help** | -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } | *DATA* := { [**hex**] *BYTES* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } | *VALUE* := { *DATA* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } | *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** @@ -55,8 +55,9 @@ DESCRIPTION =========== **bpftool map { show | list }** [*MAP*] Show information about loaded maps. If *MAP* is specified - show information only about given map, otherwise list all - maps currently loaded on the system. + show information only about given maps, otherwise list all + maps currently loaded on the system. In case of **name**, + *MAP* may match several maps which will all be shown. Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). @@ -66,7 +67,8 @@ DESCRIPTION as *FILE*. **bpftool map dump** *MAP* - Dump all entries in a given *MAP*. + Dump all entries in a given *MAP*. In case of **name**, + *MAP* may match several maps which will all be dumped. **bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] Update map entry for a given *KEY*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 7a374b3c851d..64ddf8a4c518 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -33,7 +33,7 @@ PROG COMMANDS | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } | *TYPE* := { | **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | | **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | @@ -53,8 +53,10 @@ DESCRIPTION =========== **bpftool prog { show | list }** [*PROG*] Show information about loaded programs. If *PROG* is - specified show information only about given program, otherwise - list all programs currently loaded on the system. + specified show information only about given programs, + otherwise list all programs currently loaded on the system. + In case of **tag** or **name**, *PROG* may match several + programs which will all be shown. Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). @@ -68,11 +70,15 @@ DESCRIPTION performed via the **kernel.bpf_stats_enabled** sysctl knob. **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] - Dump eBPF instructions of the program from the kernel. By + Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** controls if raw opcodes should be printed as well. + In case of **tag** or **name**, *PROG* may match several + programs which will all be dumped. However, if **file** or + **visual** is specified, *PROG* must match a single program. + If **file** is specified, the binary image will instead be written to *FILE*. @@ -80,15 +86,17 @@ DESCRIPTION built instead, and eBPF instructions will be presented with CFG in DOT format, on standard output. - If the prog has line_info available, the source line will + If the programs have line_info available, the source line will be displayed by default. If **linum** is specified, the filename, line number and line column will also be displayed on top of the source line. **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. + If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. + *PROG* must match a single program when **file** is specified. **opcodes** controls if raw opcodes will be printed. diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6a9c52ef84a9..34239fda69ed 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -81,4 +81,5 @@ SEE ALSO **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8), - **bpftool-btf**\ (8) + **bpftool-btf**\ (8), + **bpftool-gen**\ (8), diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 39bc6f0f4f0b..c4e810335810 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -45,7 +45,7 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/lib/bpf \ + -I$(srctree)/tools/lib \ -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 70493a6da206..754d8395e451 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -59,6 +59,21 @@ _bpftool_get_map_ids_for_type() command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_map_names() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + +# Takes map type and adds matching map names to the list of suggestions. +_bpftool_get_map_names_for_type() +{ + local type="$1" + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command grep -C2 "$type" | \ + command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + _bpftool_get_prog_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ @@ -71,6 +86,12 @@ _bpftool_get_prog_tags() command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_prog_names() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ + command sed -n 's/.*"name": "\(.*\)",$/\1/p' )" -- "$cur" ) ) +} + _bpftool_get_btf_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \ @@ -180,6 +201,52 @@ _bpftool_map_update_get_id() esac } +_bpftool_map_update_get_name() +{ + local command="$1" + + # Is it the map to update, or a map to insert into the map to update? + # Search for "value" keyword. + local idx value + for (( idx=7; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[idx]} == "value" ]]; then + value=1 + break + fi + done + if [[ $value -eq 0 ]]; then + case "$command" in + push) + _bpftool_get_map_names_for_type stack + ;; + enqueue) + _bpftool_get_map_names_for_type queue + ;; + *) + _bpftool_get_map_names + ;; + esac + return 0 + fi + + # Name to complete is for a value. It can be either prog name or map name. This + # depends on the type of the map to update. + local type=$(_bpftool_map_guess_map_type) + case $type in + array_of_maps|hash_of_maps) + _bpftool_get_map_names + return 0 + ;; + prog_array) + _bpftool_get_prog_names + return 0 + ;; + *) + return 0 + ;; + esac +} + _bpftool() { local cur prev words objword @@ -251,7 +318,8 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - # Complete id, only for subcommands that use prog (but no map) ids + # Complete id and name, only for subcommands that use prog (but no + # map) ids/names. case $command in show|list|dump|pin) case $prev in @@ -259,12 +327,16 @@ _bpftool() _bpftool_get_prog_ids return 0 ;; + name) + _bpftool_get_prog_names + return 0 + ;; esac ;; esac - local PROG_TYPE='id pinned tag' - local MAP_TYPE='id pinned' + local PROG_TYPE='id pinned tag name' + local MAP_TYPE='id pinned name' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -315,6 +387,9 @@ _bpftool() id) _bpftool_get_prog_ids ;; + name) + _bpftool_get_map_names + ;; pinned) _filedir ;; @@ -335,6 +410,9 @@ _bpftool() id) _bpftool_get_map_ids ;; + name) + _bpftool_get_map_names + ;; pinned) _filedir ;; @@ -399,6 +477,10 @@ _bpftool() _bpftool_get_map_ids return 0 ;; + name) + _bpftool_get_map_names + return 0 + ;; pinned|pinmaps) _filedir return 0 @@ -447,7 +529,7 @@ _bpftool() esac ;; map) - local MAP_TYPE='id pinned' + local MAP_TYPE='id pinned name' case $command in show|list|dump|peek|pop|dequeue|freeze) case $prev in @@ -473,6 +555,24 @@ _bpftool() esac return 0 ;; + name) + case "$command" in + peek) + _bpftool_get_map_names_for_type stack + _bpftool_get_map_names_for_type queue + ;; + pop) + _bpftool_get_map_names_for_type stack + ;; + dequeue) + _bpftool_get_map_names_for_type queue + ;; + *) + _bpftool_get_map_names + ;; + esac + return 0 + ;; *) return 0 ;; @@ -520,6 +620,10 @@ _bpftool() _bpftool_get_map_ids return 0 ;; + name) + _bpftool_get_map_names + return 0 + ;; key) COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; @@ -545,6 +649,10 @@ _bpftool() _bpftool_map_update_get_id $command return 0 ;; + name) + _bpftool_map_update_get_name $command + return 0 + ;; key) COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; @@ -553,13 +661,13 @@ _bpftool() # map, depending on the type of the map to update. case "$(_bpftool_map_guess_map_type)" in array_of_maps|hash_of_maps) - local MAP_TYPE='id pinned' + local MAP_TYPE='id pinned name' COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ -- "$cur" ) ) return 0 ;; prog_array) - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' COMPREPLY+=( $( compgen -W "$PROG_TYPE" \ -- "$cur" ) ) return 0 @@ -621,6 +729,10 @@ _bpftool() _bpftool_get_map_ids_for_type perf_event_array return 0 ;; + name) + _bpftool_get_map_names_for_type perf_event_array + return 0 + ;; cpu) return 0 ;; @@ -644,8 +756,8 @@ _bpftool() esac ;; btf) - local PROG_TYPE='id pinned tag' - local MAP_TYPE='id pinned' + local PROG_TYPE='id pinned tag name' + local MAP_TYPE='id pinned name' case $command in dump) case $prev in @@ -676,6 +788,17 @@ _bpftool() esac return 0 ;; + name) + case $pprev in + prog) + _bpftool_get_prog_names + ;; + map) + _bpftool_get_map_names + ;; + esac + return 0 + ;; format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; @@ -716,6 +839,17 @@ _bpftool() ;; esac ;; + gen) + case $command in + skeleton) + _filedir + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) + ;; + esac + ;; cgroup) case $command in show|list|tree) @@ -735,7 +869,7 @@ _bpftool() connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \ getsockopt setsockopt' local ATTACH_FLAGS='multi override' - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' case $prev in $command) _filedir @@ -760,7 +894,7 @@ _bpftool() elif [[ "$command" == "attach" ]]; then # We have an attach type on the command line, # but it is not the previous word, or - # "id|pinned|tag" (we already checked for + # "id|pinned|tag|name" (we already checked for # that). This should only leave the case when # we need attach flags for "attach" commamnd. _bpftool_one_of_list "$ATTACH_FLAGS" @@ -786,7 +920,7 @@ _bpftool() esac ;; net) - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload' case $command in show|list) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index e5bc97b71ceb..b3745ed711ba 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,15 +8,15 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include <linux/btf.h> #include <linux/hashtable.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include "btf.h" #include "json_writer.h" #include "main.h" @@ -77,6 +77,20 @@ static const char *btf_var_linkage_str(__u32 linkage) } } +static const char *btf_func_linkage_str(const struct btf_type *t) +{ + switch (btf_vlen(t)) { + case BTF_FUNC_STATIC: + return "static"; + case BTF_FUNC_GLOBAL: + return "global"; + case BTF_FUNC_EXTERN: + return "extern"; + default: + return "(unknown)"; + } +} + static const char *btf_str(const struct btf *btf, __u32 off) { if (!off) @@ -231,12 +245,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" fwd_kind=%s", fwd_kind); break; } - case BTF_KIND_FUNC: - if (json_output) + case BTF_KIND_FUNC: { + const char *linkage = btf_func_linkage_str(t); + + if (json_output) { jsonw_uint_field(w, "type_id", t->type); - else - printf(" type_id=%u", t->type); + jsonw_string_field(w, "linkage", linkage); + } else { + printf(" type_id=%u linkage=%s", t->type, linkage); + } break; + } case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = (const void *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); @@ -370,6 +389,10 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); + printf("#endif\n\n"); + if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { err = btf_dump__dump_type(d, root_type_ids[i]); @@ -386,6 +409,10 @@ static int dump_btf_c(const struct btf *btf, } } + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute pop\n"); + printf("#endif\n"); + done: btf_dump__free(d); return err; @@ -524,7 +551,7 @@ static int do_dump(int argc, char **argv) if (IS_ERR(btf)) { err = PTR_ERR(btf); btf = NULL; - p_err("failed to load BTF from %s: %s", + p_err("failed to load BTF from %s: %s", *argv, strerror(err)); goto done; } diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 397e5716ab6d..01cc52b834fa 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -8,8 +8,8 @@ #include <linux/bitops.h> #include <linux/btf.h> #include <linux/err.h> +#include <bpf/btf.h> -#include "btf.h" #include "json_writer.h" #include "main.h" diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 1ef45e55039e..62c6a1d7cd18 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -14,7 +14,7 @@ #include <sys/types.h> #include <unistd.h> -#include <bpf.h> +#include <bpf/bpf.h> #include "main.h" @@ -117,6 +117,25 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) return prog_cnt; } +static int cgroup_has_attached_progs(int cgroup_fd) +{ + enum bpf_attach_type type; + bool no_prog = true; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + int count = count_attached_bpf_progs(cgroup_fd, type); + + if (count < 0 && errno != EINVAL) + return -1; + + if (count > 0) { + no_prog = false; + break; + } + } + + return no_prog ? 0 : 1; +} static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { @@ -161,6 +180,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { enum bpf_attach_type type; + int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; @@ -192,6 +212,16 @@ static int do_show(int argc, char **argv) goto exit; } + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + path, strerror(errno)); + goto exit_cgroup; + } else if (!has_attached_progs) { + ret = 0; + goto exit_cgroup; + } + if (json_output) jsonw_start_array(json_wtr); else @@ -212,6 +242,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); +exit_cgroup: close(cgroup_fd); exit: return ret; @@ -228,7 +259,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { enum bpf_attach_type type; - bool skip = true; + int has_attached_progs; int cgroup_fd; if (typeflag != FTW_D) @@ -240,22 +271,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, return SHOW_TREE_FN_ERR; } - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); - - if (count < 0 && errno != EINVAL) { - p_err("can't query bpf programs attached to %s: %s", - fpath, strerror(errno)); - close(cgroup_fd); - return SHOW_TREE_FN_ERR; - } - if (count > 0) { - skip = false; - break; - } - } - - if (skip) { + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + fpath, strerror(errno)); + close(cgroup_fd); + return SHOW_TREE_FN_ERR; + } else if (!has_attached_progs) { close(cgroup_fd); return 0; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 88264abaa738..b75b8ec5469c 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -20,8 +20,8 @@ #include <sys/stat.h> #include <sys/vfs.h> -#include <bpf.h> -#include <libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/bpf.h> +#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ #include "main.h" diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 03bdc5b3ac49..941873d778d8 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -12,8 +12,8 @@ #include <linux/filter.h> #include <linux/limits.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include <zlib.h> #include "main.h" @@ -572,6 +572,18 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, printf("\n"); } +static void +probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +{ + bool res; + + res = bpf_probe_large_insn_limit(ifindex); + print_bool_feature("have_large_insn_limit", + "Large program size limit", + "LARGE_INSN_LIMIT", + res, define_prefix); +} + static int do_probe(int argc, char **argv) { enum probe_component target = COMPONENT_UNSPEC; @@ -724,6 +736,12 @@ static int do_probe(int argc, char **argv) probe_helpers_for_progtype(i, supported_types[i], define_prefix, ifindex); + print_end_then_start_section("misc", + "Scanning miscellaneous eBPF features...", + "/*** eBPF misc features ***/", + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); + exit_close_json: if (json_output) { /* End current "section" of probes */ diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c new file mode 100644 index 000000000000..f8113b3646f5 --- /dev/null +++ b/tools/bpf/bpftool/gen.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Facebook */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/err.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <bpf/btf.h> + +#include "bpf/libbpf_internal.h" +#include "json_writer.h" +#include "main.h" + + +#define MAX_OBJ_NAME_LEN 64 + +static void sanitize_identifier(char *name) +{ + int i; + + for (i = 0; name[i]; i++) + if (!isalnum(name[i]) && name[i] != '_') + name[i] = '_'; +} + +static bool str_has_suffix(const char *str, const char *suffix) +{ + size_t i, n1 = strlen(str), n2 = strlen(suffix); + + if (n1 < n2) + return false; + + for (i = 0; i < n2; i++) { + if (str[n1 - i - 1] != suffix[n2 - i - 1]) + return false; + } + + return true; +} + +static void get_obj_name(char *name, const char *file) +{ + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); + name[MAX_OBJ_NAME_LEN - 1] = '\0'; + if (str_has_suffix(name, ".o")) + name[strlen(name) - 2] = '\0'; + sanitize_identifier(name); +} + +static void get_header_guard(char *guard, const char *obj_name) +{ + int i; + + sprintf(guard, "__%s_SKEL_H__", obj_name); + for (i = 0; guard[i]; i++) + guard[i] = toupper(guard[i]); +} + +static const char *get_map_ident(const struct bpf_map *map) +{ + const char *name = bpf_map__name(map); + + if (!bpf_map__is_internal(map)) + return name; + + if (str_has_suffix(name, ".data")) + return "data"; + else if (str_has_suffix(name, ".rodata")) + return "rodata"; + else if (str_has_suffix(name, ".bss")) + return "bss"; + else if (str_has_suffix(name, ".kconfig")) + return "kconfig"; + else + return NULL; +} + +static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args) +{ + vprintf(fmt, args); +} + +static int codegen_datasec_def(struct bpf_object *obj, + struct btf *btf, + struct btf_dump *d, + const struct btf_type *sec, + const char *obj_name) +{ + const char *sec_name = btf__name_by_offset(btf, sec->name_off); + const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); + int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); + const char *sec_ident; + char var_ident[256]; + + if (strcmp(sec_name, ".data") == 0) + sec_ident = "data"; + else if (strcmp(sec_name, ".bss") == 0) + sec_ident = "bss"; + else if (strcmp(sec_name, ".rodata") == 0) + sec_ident = "rodata"; + else if (strcmp(sec_name, ".kconfig") == 0) + sec_ident = "kconfig"; + else + return 0; + + printf(" struct %s__%s {\n", obj_name, sec_ident); + for (i = 0; i < vlen; i++, sec_var++) { + const struct btf_type *var = btf__type_by_id(btf, sec_var->type); + const char *var_name = btf__name_by_offset(btf, var->name_off); + DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, + .field_name = var_ident, + .indent_level = 2, + ); + int need_off = sec_var->offset, align_off, align; + __u32 var_type_id = var->type; + const struct btf_type *t; + + t = btf__type_by_id(btf, var_type_id); + while (btf_is_mod(t)) { + var_type_id = t->type; + t = btf__type_by_id(btf, var_type_id); + } + + if (off > need_off) { + p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n", + sec_name, i, need_off, off); + return -EINVAL; + } + + align = btf__align_of(btf, var->type); + if (align <= 0) { + p_err("Failed to determine alignment of variable '%s': %d", + var_name, align); + return -EINVAL; + } + + align_off = (off + align - 1) / align * align; + if (align_off != need_off) { + printf("\t\tchar __pad%d[%d];\n", + pad_cnt, need_off - off); + pad_cnt++; + } + + /* sanitize variable name, e.g., for static vars inside + * a function, it's name is '<function name>.<variable name>', + * which we'll turn into a '<function name>_<variable name>' + */ + var_ident[0] = '\0'; + strncat(var_ident, var_name, sizeof(var_ident) - 1); + sanitize_identifier(var_ident); + + printf("\t\t"); + err = btf_dump__emit_type_decl(d, var_type_id, &opts); + if (err) + return err; + printf(";\n"); + + off = sec_var->offset + sec_var->size; + } + printf(" } *%s;\n", sec_ident); + return 0; +} + +static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) +{ + struct btf *btf = bpf_object__btf(obj); + int n = btf__get_nr_types(btf); + struct btf_dump *d; + int i, err = 0; + + d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); + + for (i = 1; i <= n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + err = codegen_datasec_def(obj, btf, d, t, obj_name); + if (err) + goto out; + } +out: + btf_dump__free(d); + return err; +} + +static int codegen(const char *template, ...) +{ + const char *src, *end; + int skip_tabs = 0, n; + char *s, *dst; + va_list args; + char c; + + n = strlen(template); + s = malloc(n + 1); + if (!s) + return -ENOMEM; + src = template; + dst = s; + + /* find out "baseline" indentation to skip */ + while ((c = *src++)) { + if (c == '\t') { + skip_tabs++; + } else if (c == '\n') { + break; + } else { + p_err("unrecognized character at pos %td in template '%s'", + src - template - 1, template); + return -EINVAL; + } + } + + while (*src) { + /* skip baseline indentation tabs */ + for (n = skip_tabs; n > 0; n--, src++) { + if (*src != '\t') { + p_err("not enough tabs at pos %td in template '%s'", + src - template - 1, template); + return -EINVAL; + } + } + /* trim trailing whitespace */ + end = strchrnul(src, '\n'); + for (n = end - src; n > 0 && isspace(src[n - 1]); n--) + ; + memcpy(dst, src, n); + dst += n; + if (*end) + *dst++ = '\n'; + src = *end ? end + 1 : end; + } + *dst++ = '\0'; + + /* print out using adjusted template */ + va_start(args, template); + n = vprintf(s, args); + va_end(args); + + free(s); + return n; +} + +static int do_skeleton(int argc, char **argv) +{ + char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; + size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + char obj_name[MAX_OBJ_NAME_LEN], *obj_data; + struct bpf_object *obj = NULL; + const char *file, *ident; + struct bpf_program *prog; + int fd, len, err = -1; + struct bpf_map *map; + struct btf *btf; + struct stat st; + + if (!REQ_ARGS(1)) { + usage(); + return -1; + } + file = GET_ARG(); + + if (argc) { + p_err("extra unknown arguments"); + return -1; + } + + if (stat(file, &st)) { + p_err("failed to stat() %s: %s", file, strerror(errno)); + return -1; + } + file_sz = st.st_size; + mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE)); + fd = open(file, O_RDONLY); + if (fd < 0) { + p_err("failed to open() %s: %s", file, strerror(errno)); + return -1; + } + obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (obj_data == MAP_FAILED) { + obj_data = NULL; + p_err("failed to mmap() %s: %s", file, strerror(errno)); + goto out; + } + get_obj_name(obj_name, file); + opts.object_name = obj_name; + obj = bpf_object__open_mem(obj_data, file_sz, &opts); + if (IS_ERR(obj)) { + obj = NULL; + p_err("failed to open BPF object file: %ld", PTR_ERR(obj)); + goto out; + } + + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + if (!ident) { + p_err("ignoring unrecognized internal map '%s'...", + bpf_map__name(map)); + continue; + } + map_cnt++; + } + bpf_object__for_each_program(prog, obj) { + prog_cnt++; + } + + get_header_guard(header_guard, obj_name); + codegen("\ + \n\ + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ + \n\ + /* THIS FILE IS AUTOGENERATED! */ \n\ + #ifndef %2$s \n\ + #define %2$s \n\ + \n\ + #include <stdlib.h> \n\ + #include <bpf/libbpf.h> \n\ + \n\ + struct %1$s { \n\ + struct bpf_object_skeleton *skeleton; \n\ + struct bpf_object *obj; \n\ + ", + obj_name, header_guard + ); + + if (map_cnt) { + printf("\tstruct {\n"); + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + if (!ident) + continue; + printf("\t\tstruct bpf_map *%s;\n", ident); + } + printf("\t} maps;\n"); + } + + if (prog_cnt) { + printf("\tstruct {\n"); + bpf_object__for_each_program(prog, obj) { + printf("\t\tstruct bpf_program *%s;\n", + bpf_program__name(prog)); + } + printf("\t} progs;\n"); + printf("\tstruct {\n"); + bpf_object__for_each_program(prog, obj) { + printf("\t\tstruct bpf_link *%s;\n", + bpf_program__name(prog)); + } + printf("\t} links;\n"); + } + + btf = bpf_object__btf(obj); + if (btf) { + err = codegen_datasecs(obj, obj_name); + if (err) + goto out; + } + + codegen("\ + \n\ + }; \n\ + \n\ + static void \n\ + %1$s__destroy(struct %1$s *obj) \n\ + { \n\ + if (!obj) \n\ + return; \n\ + if (obj->skeleton) \n\ + bpf_object__destroy_skeleton(obj->skeleton);\n\ + free(obj); \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__create_skeleton(struct %1$s *obj); \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open_opts(const struct bpf_object_open_opts *opts) \n\ + { \n\ + struct %1$s *obj; \n\ + \n\ + obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\ + if (!obj) \n\ + return NULL; \n\ + if (%1$s__create_skeleton(obj)) \n\ + goto err; \n\ + if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\ + goto err; \n\ + \n\ + return obj; \n\ + err: \n\ + %1$s__destroy(obj); \n\ + return NULL; \n\ + } \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open(void) \n\ + { \n\ + return %1$s__open_opts(NULL); \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__load(struct %1$s *obj) \n\ + { \n\ + return bpf_object__load_skeleton(obj->skeleton); \n\ + } \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open_and_load(void) \n\ + { \n\ + struct %1$s *obj; \n\ + \n\ + obj = %1$s__open(); \n\ + if (!obj) \n\ + return NULL; \n\ + if (%1$s__load(obj)) { \n\ + %1$s__destroy(obj); \n\ + return NULL; \n\ + } \n\ + return obj; \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__attach(struct %1$s *obj) \n\ + { \n\ + return bpf_object__attach_skeleton(obj->skeleton); \n\ + } \n\ + \n\ + static inline void \n\ + %1$s__detach(struct %1$s *obj) \n\ + { \n\ + return bpf_object__detach_skeleton(obj->skeleton); \n\ + } \n\ + ", + obj_name + ); + + codegen("\ + \n\ + \n\ + static inline int \n\ + %1$s__create_skeleton(struct %1$s *obj) \n\ + { \n\ + struct bpf_object_skeleton *s; \n\ + \n\ + s = (typeof(s))calloc(1, sizeof(*s)); \n\ + if (!s) \n\ + return -1; \n\ + obj->skeleton = s; \n\ + \n\ + s->sz = sizeof(*s); \n\ + s->name = \"%1$s\"; \n\ + s->obj = &obj->obj; \n\ + ", + obj_name + ); + if (map_cnt) { + codegen("\ + \n\ + \n\ + /* maps */ \n\ + s->map_cnt = %zu; \n\ + s->map_skel_sz = sizeof(*s->maps); \n\ + s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\ + if (!s->maps) \n\ + goto err; \n\ + ", + map_cnt + ); + i = 0; + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + + if (!ident) + continue; + + codegen("\ + \n\ + \n\ + s->maps[%zu].name = \"%s\"; \n\ + s->maps[%zu].map = &obj->maps.%s; \n\ + ", + i, bpf_map__name(map), i, ident); + /* memory-mapped internal maps */ + if (bpf_map__is_internal(map) && + (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { + printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", + i, ident); + } + i++; + } + } + if (prog_cnt) { + codegen("\ + \n\ + \n\ + /* programs */ \n\ + s->prog_cnt = %zu; \n\ + s->prog_skel_sz = sizeof(*s->progs); \n\ + s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\ + if (!s->progs) \n\ + goto err; \n\ + ", + prog_cnt + ); + i = 0; + bpf_object__for_each_program(prog, obj) { + codegen("\ + \n\ + \n\ + s->progs[%1$zu].name = \"%2$s\"; \n\ + s->progs[%1$zu].prog = &obj->progs.%2$s;\n\ + s->progs[%1$zu].link = &obj->links.%2$s;\n\ + ", + i, bpf_program__name(prog)); + i++; + } + } + codegen("\ + \n\ + \n\ + s->data_sz = %d; \n\ + s->data = (void *)\"\\ \n\ + ", + file_sz); + + /* embed contents of BPF object file */ + for (i = 0, len = 0; i < file_sz; i++) { + int w = obj_data[i] ? 4 : 2; + + len += w; + if (len > 78) { + printf("\\\n"); + len = w; + } + if (!obj_data[i]) + printf("\\0"); + else + printf("\\x%02x", (unsigned char)obj_data[i]); + } + + codegen("\ + \n\ + \"; \n\ + \n\ + return 0; \n\ + err: \n\ + bpf_object__destroy_skeleton(s); \n\ + return -1; \n\ + } \n\ + \n\ + #endif /* %s */ \n\ + ", + header_guard); + err = 0; +out: + bpf_object__close(obj); + if (obj_data) + munmap(obj_data, mmap_sz); + close(fd); + return err; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %1$s gen skeleton FILE\n" + " %1$s gen help\n" + "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name); + + return 0; +} + +static const struct cmd cmds[] = { + { "skeleton", do_skeleton }, + { "help", do_help }, + { 0 } +}; + +int do_gen(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index bfed711258ce..f7f5885aa3ba 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -24,7 +24,7 @@ #include <dis-asm.h> #include <sys/stat.h> #include <limits.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include "json_writer.h" #include "main.h" diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 4764581ff9ea..6d41bbfc6459 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -9,8 +9,8 @@ #include <stdlib.h> #include <string.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "main.h" @@ -58,7 +58,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -227,6 +227,7 @@ static const struct cmd cmds[] = { { "net", do_net }, { "feature", do_feature }, { "btf", do_btf }, + { "gen", do_gen }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 2899095f8254..4e75b58d3989 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -42,12 +42,12 @@ #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" #define HELP_SPEC_PROGRAM \ - "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ "\t {-m|--mapcompat} | {-n|--nomount} }" #define HELP_SPEC_MAP \ - "MAP := { id MAP_ID | pinned FILE }" + "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" static const char * const prog_type_name[] = { [BPF_PROG_TYPE_UNSPEC] = "unspec", @@ -155,6 +155,7 @@ int do_net(int argc, char **arg); int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); int do_btf(int argc, char **argv); +int do_gen(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index de61d73b9030..e6c85680b34d 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -15,9 +15,9 @@ #include <sys/types.h> #include <sys/stat.h> -#include <bpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> -#include "btf.h" #include "json_writer.h" #include "main.h" @@ -48,6 +48,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_QUEUE] = "queue", [BPF_MAP_TYPE_STACK] = "stack", [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -91,10 +92,66 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -int map_parse_fd(int *argc, char ***argv) +static int map_fd_by_name(char *name, int **fds) { - int fd; + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get map info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +static int map_parse_fds(int *argc, char ***argv, int **fds) +{ if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -108,10 +165,25 @@ int map_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - fd = bpf_map_get_fd_by_id(id); - if (fd < 0) + (*fds)[0] = bpf_map_get_fd_by_id(id); + if ((*fds)[0] < 0) { p_err("get map by id (%u): %s", id, strerror(errno)); - return fd; + return -1; + } + return 1; + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return map_fd_by_name(name, fds); } else if (is_prefix(**argv, "pinned")) { char *path; @@ -120,13 +192,43 @@ int map_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_MAP); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + if ((*fds)[0] < 0) + return -1; + return 1; } - p_err("expected 'id' or 'pinned', got: '%s'?", **argv); + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); return -1; } +int map_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several maps match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) { int err; @@ -150,6 +252,7 @@ static int do_dump_btf(const struct btf_dumper *d, struct bpf_map_info *map_info, void *key, void *value) { + __u32 value_id; int ret; /* start of key-value pair */ @@ -163,9 +266,12 @@ static int do_dump_btf(const struct btf_dumper *d, goto err_end_obj; } + value_id = map_info->btf_vmlinux_value_type_id ? + : map_info->btf_value_type_id; + if (!map_is_per_cpu(map_info->type)) { jsonw_name(d->jw, "value"); - ret = btf_dumper_type(d, map_info->btf_value_type_id, value); + ret = btf_dumper_type(d, value_id, value); } else { unsigned int i, n, step; @@ -177,8 +283,7 @@ static int do_dump_btf(const struct btf_dumper *d, jsonw_start_object(d->jw); jsonw_int_field(d->jw, "cpu", i); jsonw_name(d->jw, "value"); - ret = btf_dumper_type(d, map_info->btf_value_type_id, - value + i * step); + ret = btf_dumper_type(d, value_id, value + i * step); jsonw_end_object(d->jw); if (ret) break; @@ -479,6 +584,21 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } +static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) +{ + jsonw_uint_field(wtr, "id", info->id); + if (info->type < ARRAY_SIZE(map_type_name)) + jsonw_string_field(wtr, "type", map_type_name[info->type]); + else + jsonw_uint_field(wtr, "type", info->type); + + if (*info->name) + jsonw_string_field(wtr, "name", info->name); + + jsonw_name(wtr, "flags"); + jsonw_printf(wtr, "%d", info->map_flags); +} + static int show_map_close_json(int fd, struct bpf_map_info *info) { char *memlock, *frozen_str; @@ -489,18 +609,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_start_object(json_wtr); - jsonw_uint_field(json_wtr, "id", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - jsonw_string_field(json_wtr, "type", - map_type_name[info->type]); - else - jsonw_uint_field(json_wtr, "type", info->type); - - if (*info->name) - jsonw_string_field(json_wtr, "name", info->name); - - jsonw_name(json_wtr, "flags"); - jsonw_printf(json_wtr, "%d", info->map_flags); + show_map_header_json(info, json_wtr); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); @@ -561,14 +670,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) return 0; } -static int show_map_close_plain(int fd, struct bpf_map_info *info) +static void show_map_header_plain(struct bpf_map_info *info) { - char *memlock, *frozen_str; - int frozen = 0; - - memlock = get_fdinfo(fd, "memlock"); - frozen_str = get_fdinfo(fd, "frozen"); - printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) printf("%s ", map_type_name[info->type]); @@ -581,6 +684,17 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("flags 0x%x", info->map_flags); print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); +} + +static int show_map_close_plain(int fd, struct bpf_map_info *info) +{ + char *memlock, *frozen_str; + int frozen = 0; + + memlock = get_fdinfo(fd, "memlock"); + frozen_str = get_fdinfo(fd, "frozen"); + + show_map_header_plain(info); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); @@ -642,6 +756,50 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) return 0; } +static int do_show_subset(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int *fds = NULL; + int nb_fds, i; + int err = -1; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + if (err) { + p_err("can't get map info: %s", + strerror(errno)); + for (; i < nb_fds; i++) + close(fds[i]); + break; + } + + if (json_output) + show_map_close_json(fds[i], &info); + else + show_map_close_plain(fds[i], &info); + + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_free: + free(fds); + return err; +} + static int do_show(int argc, char **argv) { struct bpf_map_info info = {}; @@ -653,16 +811,8 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&map_table, BPF_OBJ_MAP); - if (argc == 2) { - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); - if (fd < 0) - return -1; - - if (json_output) - return show_map_close_json(fd, &info); - else - return show_map_close_plain(fd, &info); - } + if (argc == 2) + return do_show_subset(argc, argv); if (argc) return BAD_ARG(); @@ -765,26 +915,75 @@ static int dump_map_elem(int fd, void *key, void *value, return 0; } -static int do_dump(int argc, char **argv) +static int maps_have_btf(int *fds, int nb_fds) { struct bpf_map_info info = {}; - void *key, *value, *prev_key; - unsigned int num_elems = 0; __u32 len = sizeof(info); - json_writer_t *btf_wtr; + int err, i; + + for (i = 0; i < nb_fds; i++) { + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + return -1; + } + + if (!info.btf_id) + return 0; + } + + return 1; +} + +static struct btf *btf_vmlinux; + +static struct btf *get_map_kv_btf(const struct bpf_map_info *info) +{ struct btf *btf = NULL; - int err; - int fd; - if (argc != 2) - usage(); + if (info->btf_vmlinux_value_type_id) { + if (!btf_vmlinux) { + btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(btf_vmlinux)) + p_err("failed to get kernel btf"); + } + return btf_vmlinux; + } else if (info->btf_value_type_id) { + int err; + + err = btf__get_from_id(info->btf_id, &btf); + if (err || !btf) { + p_err("failed to get btf"); + btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); + } + } - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); - if (fd < 0) - return -1; + return btf; +} - key = malloc(info.key_size); - value = alloc_value(&info); +static void free_map_kv_btf(struct btf *btf) +{ + if (!IS_ERR(btf) && btf != btf_vmlinux) + btf__free(btf); +} + +static void free_btf_vmlinux(void) +{ + if (!IS_ERR(btf_vmlinux)) + btf__free(btf_vmlinux); +} + +static int +map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, + bool show_header) +{ + void *key, *value, *prev_key; + unsigned int num_elems = 0; + struct btf *btf = NULL; + int err; + + key = malloc(info->key_size); + value = alloc_value(info); if (!key || !value) { p_err("mem alloc failed"); err = -1; @@ -793,30 +992,27 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto exit_free; - } + if (wtr) { + btf = get_map_kv_btf(info); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + goto exit_free; + } - if (json_output) - jsonw_start_array(json_wtr); - else - if (btf) { - btf_wtr = get_btf_writer(); - if (!btf_wtr) { - p_info("failed to create json writer for btf. falling back to plain output"); - btf__free(btf); - btf = NULL; - } else { - jsonw_start_array(btf_wtr); - } + if (show_header) { + jsonw_start_object(wtr); /* map object */ + show_map_header_json(info, wtr); + jsonw_name(wtr, "elements"); } + jsonw_start_array(wtr); /* elements */ + } else if (show_header) { + show_map_header_plain(info); + } - if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && - info.value_size != 8) + if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + info->value_size != 8) p_info("Warning: cannot read values from %s map with value_size != 8", - map_type_name[info.type]); + map_type_name[info->type]); while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -824,15 +1020,14 @@ static int do_dump(int argc, char **argv) err = 0; break; } - num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr); + num_elems += dump_map_elem(fd, key, value, info, btf, wtr); prev_key = key; } - if (json_output) - jsonw_end_array(json_wtr); - else if (btf) { - jsonw_end_array(btf_wtr); - jsonw_destroy(&btf_wtr); + if (wtr) { + jsonw_end_array(wtr); /* elements */ + if (show_header) + jsonw_end_object(wtr); /* map object */ } else { printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); @@ -842,11 +1037,78 @@ exit_free: free(key); free(value); close(fd); - btf__free(btf); + free_map_kv_btf(btf); return err; } +static int do_dump(int argc, char **argv) +{ + json_writer_t *wtr = NULL, *btf_wtr = NULL; + struct bpf_map_info info = {}; + int nb_fds, i = 0; + __u32 len = sizeof(info); + int *fds = NULL; + int err = -1; + + if (argc != 2) + usage(); + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output) { + wtr = json_wtr; + } else { + int do_plain_btf; + + do_plain_btf = maps_have_btf(fds, nb_fds); + if (do_plain_btf < 0) + goto exit_close; + + if (do_plain_btf) { + btf_wtr = get_btf_writer(); + wtr = btf_wtr; + if (!btf_wtr) + p_info("failed to create json writer for btf. falling back to plain output"); + } + } + + if (wtr && nb_fds > 1) + jsonw_start_array(wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) { + p_err("can't get map info: %s", strerror(errno)); + break; + } + err = map_dump(fds[i], &info, wtr, nb_fds > 1); + if (!wtr && i != nb_fds - 1) + printf("\n"); + + if (err) + break; + close(fds[i]); + } + if (wtr && nb_fds > 1) + jsonw_end_array(wtr); /* root array */ + + if (btf_wtr) + jsonw_destroy(&btf_wtr); +exit_close: + for (; i < nb_fds; i++) + close(fds[i]); +exit_free: + free(fds); + free_btf_vmlinux(); + return err; +} + static int alloc_key_value(struct bpf_map_info *info, void **key, void **value) { *key = NULL; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 4c5531d1a450..d9b29c17fbb8 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -6,7 +6,7 @@ */ #include <errno.h> #include <fcntl.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include <poll.h> #include <signal.h> #include <stdbool.h> @@ -21,7 +21,7 @@ #include <sys/mman.h> #include <sys/syscall.h> -#include <bpf.h> +#include <bpf/bpf.h> #include <perf-sys.h> #include "main.h" diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 4f52d3151616..c5e3895b7c8b 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -7,7 +7,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include <net/if.h> #include <linux/if.h> #include <linux/rtnetlink.h> @@ -16,8 +17,8 @@ #include <sys/stat.h> #include <sys/types.h> -#include <bpf.h> -#include <nlattr.h> +#include "bpf/nlattr.h" +#include "bpf/libbpf_internal.h" #include "main.h" #include "netlink_dumper.h" diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index 550a0f537eed..5f65140b003b 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -3,11 +3,11 @@ #include <stdlib.h> #include <string.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include <linux/rtnetlink.h> #include <linux/tc_act/tc_bpf.h> -#include <nlattr.h> +#include "bpf/nlattr.h" #include "main.h" #include "netlink_dumper.h" diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index b2046f33e23f..3341aa14acda 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -13,7 +13,7 @@ #include <unistd.h> #include <ftw.h> -#include <bpf.h> +#include <bpf/bpf.h> #include "main.h" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ce9c5ba1934..b352ab041160 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -17,14 +17,19 @@ #include <linux/err.h> #include <linux/sizes.h> -#include <bpf.h> -#include <btf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include "cfg.h" #include "main.h" #include "xlated_dumper.h" +enum dump_mode { + DUMP_JITED, + DUMP_XLATED, +}; + static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", @@ -77,11 +82,12 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) strftime(buf, size, "%FT%T%z", &load_tm); } -static int prog_fd_by_tag(unsigned char *tag) +static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) { unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; int err; - int fd; while (true) { struct bpf_prog_info info = {}; @@ -89,36 +95,54 @@ static int prog_fd_by_tag(unsigned char *tag) err = bpf_prog_get_next_id(id, &id); if (err) { - p_err("%s", strerror(errno)); - return -1; + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; } fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { p_err("can't get prog by id (%u): %s", id, strerror(errno)); - return -1; + goto err_close_fds; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get prog info (%u): %s", id, strerror(errno)); - close(fd); - return -1; + goto err_close_fd; } - if (!memcmp(tag, info.tag, BPF_TAG_SIZE)) - return fd; + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + close(fd); + continue; + } - close(fd); + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; } -int prog_parse_fd(int *argc, char ***argv) +static int prog_parse_fds(int *argc, char ***argv, int **fds) { - int fd; - if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -132,10 +156,12 @@ int prog_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - fd = bpf_prog_get_fd_by_id(id); - if (fd < 0) + (*fds)[0] = bpf_prog_get_fd_by_id(id); + if ((*fds)[0] < 0) { p_err("get by id (%u): %s", id, strerror(errno)); - return fd; + return -1; + } + return 1; } else if (is_prefix(**argv, "tag")) { unsigned char tag[BPF_TAG_SIZE]; @@ -149,7 +175,20 @@ int prog_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - return prog_fd_by_tag(tag); + return prog_fd_by_nametag(tag, fds, true); + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(name, fds, false); } else if (is_prefix(**argv, "pinned")) { char *path; @@ -158,13 +197,43 @@ int prog_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_PROG); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + if ((*fds)[0] < 0) + return -1; + return 1; } - p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv); + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); return -1; } +int prog_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several programs match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + static void show_prog_maps(int fd, u32 num_maps) { struct bpf_prog_info info = {}; @@ -194,11 +263,8 @@ static void show_prog_maps(int fd, u32 num_maps) } } -static void print_prog_json(struct bpf_prog_info *info, int fd) +static void print_prog_header_json(struct bpf_prog_info *info) { - char *memlock; - - jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) jsonw_string_field(json_wtr, "type", @@ -219,7 +285,14 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns); jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt); } +} +static void print_prog_json(struct bpf_prog_info *info, int fd) +{ + char *memlock; + + jsonw_start_object(json_wtr); + print_prog_header_json(info); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -268,10 +341,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_object(json_wtr); } -static void print_prog_plain(struct bpf_prog_info *info, int fd) +static void print_prog_header_plain(struct bpf_prog_info *info) { - char *memlock; - printf("%u: ", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) printf("%s ", prog_type_name[info->type]); @@ -289,6 +360,13 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf(" run_time_ns %lld run_cnt %lld", info->run_time_ns, info->run_cnt); printf("\n"); +} + +static void print_prog_plain(struct bpf_prog_info *info, int fd) +{ + char *memlock; + + print_prog_header_plain(info); if (info->load_time) { char buf[32]; @@ -349,6 +427,40 @@ static int show_prog(int fd) return 0; } +static int do_show_subset(int argc, char **argv) +{ + int *fds = NULL; + int nb_fds, i; + int err = -1; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + err = show_prog(fds[i]); + if (err) { + for (; i < nb_fds; i++) + close(fds[i]); + break; + } + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_free: + free(fds); + return err; +} + static int do_show(int argc, char **argv) { __u32 id = 0; @@ -358,15 +470,8 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); - if (argc == 2) { - fd = prog_parse_fd(&argc, &argv); - if (fd < 0) - return -1; - - err = show_prog(fd); - close(fd); - return err; - } + if (argc == 2) + return do_show_subset(argc, argv); if (argc) return BAD_ARG(); @@ -408,101 +513,32 @@ static int do_show(int argc, char **argv) return err; } -static int do_dump(int argc, char **argv) +static int +prog_dump(struct bpf_prog_info *info, enum dump_mode mode, + char *filepath, bool opcodes, bool visual, bool linum) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; - enum {DUMP_JITED, DUMP_XLATED} mode; const char *disasm_opt = NULL; - struct bpf_prog_info *info; struct dump_data dd = {}; void *func_info = NULL; struct btf *btf = NULL; - char *filepath = NULL; - bool opcodes = false; - bool visual = false; char func_sig[1024]; unsigned char *buf; - bool linum = false; __u32 member_len; - __u64 arrays; ssize_t n; int fd; - if (is_prefix(*argv, "jited")) { - if (disasm_init()) - return -1; - mode = DUMP_JITED; - } else if (is_prefix(*argv, "xlated")) { - mode = DUMP_XLATED; - } else { - p_err("expected 'xlated' or 'jited', got: %s", *argv); - return -1; - } - NEXT_ARG(); - - if (argc < 2) - usage(); - - fd = prog_parse_fd(&argc, &argv); - if (fd < 0) - return -1; - - if (is_prefix(*argv, "file")) { - NEXT_ARG(); - if (!argc) { - p_err("expected file path"); - return -1; - } - - filepath = *argv; - NEXT_ARG(); - } else if (is_prefix(*argv, "opcodes")) { - opcodes = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "visual")) { - visual = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "linum")) { - linum = true; - NEXT_ARG(); - } - - if (argc) { - usage(); - return -1; - } - - if (mode == DUMP_JITED) - arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; - else - arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - - arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - - info_linear = bpf_program__get_prog_info_linear(fd, arrays); - close(fd); - if (IS_ERR_OR_NULL(info_linear)) { - p_err("can't get prog info: %s", strerror(errno)); - return -1; - } - - info = &info_linear->info; if (mode == DUMP_JITED) { if (info->jited_prog_len == 0 || !info->jited_prog_insns) { p_info("no instructions returned"); - goto err_free; + return -1; } buf = (unsigned char *)(info->jited_prog_insns); member_len = info->jited_prog_len; } else { /* DUMP_XLATED */ - if (info->xlated_prog_len == 0) { + if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) { p_err("error retrieving insn dump: kernel.kptr_restrict set?"); - goto err_free; + return -1; } buf = (unsigned char *)info->xlated_prog_insns; member_len = info->xlated_prog_len; @@ -510,7 +546,7 @@ static int do_dump(int argc, char **argv) if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { p_err("failed to get btf"); - goto err_free; + return -1; } func_info = (void *)info->func_info; @@ -526,7 +562,7 @@ static int do_dump(int argc, char **argv) if (fd < 0) { p_err("can't open file %s: %s", filepath, strerror(errno)); - goto err_free; + return -1; } n = write(fd, buf, member_len); @@ -534,7 +570,7 @@ static int do_dump(int argc, char **argv) if (n != member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); - goto err_free; + return -1; } if (json_output) @@ -548,7 +584,7 @@ static int do_dump(int argc, char **argv) info->netns_ino, &disasm_opt); if (!name) - goto err_free; + return -1; } if (info->nr_jited_func_lens && info->jited_func_lens) { @@ -643,12 +679,130 @@ static int do_dump(int argc, char **argv) kernel_syms_destroy(&dd); } - free(info_linear); return 0; +} -err_free: - free(info_linear); - return -1; +static int do_dump(int argc, char **argv) +{ + struct bpf_prog_info_linear *info_linear; + char *filepath = NULL; + bool opcodes = false; + bool visual = false; + enum dump_mode mode; + bool linum = false; + int *fds = NULL; + int nb_fds, i = 0; + int err = -1; + __u64 arrays; + + if (is_prefix(*argv, "jited")) { + if (disasm_init()) + return -1; + mode = DUMP_JITED; + } else if (is_prefix(*argv, "xlated")) { + mode = DUMP_XLATED; + } else { + p_err("expected 'xlated' or 'jited', got: %s", *argv); + return -1; + } + NEXT_ARG(); + + if (argc < 2) + usage(); + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + p_err("expected file path"); + goto exit_close; + } + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "visual")) { + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + visual = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); + } + + if (argc) { + usage(); + goto exit_close; + } + + if (mode == DUMP_JITED) + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; + else + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; + + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("can't get prog info: %s", strerror(errno)); + break; + } + + if (json_output && nb_fds > 1) { + jsonw_start_object(json_wtr); /* prog object */ + print_prog_header_json(&info_linear->info); + jsonw_name(json_wtr, "insns"); + } else if (nb_fds > 1) { + print_prog_header_plain(&info_linear->info); + } + + err = prog_dump(&info_linear->info, mode, filepath, opcodes, + visual, linum); + + if (json_output && nb_fds > 1) + jsonw_end_object(json_wtr); /* prog object */ + else if (i != nb_fds - 1 && nb_fds > 1) + printf("\n"); + + free(info_linear); + if (err) + break; + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_close: + for (; i < nb_fds; i++) + close(fds[i]); +exit_free: + free(fds); + return err; } static int do_pin(int argc, char **argv) diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 5b91ee65a080..8608cd68cdd0 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -7,7 +7,7 @@ #include <stdlib.h> #include <string.h> #include <sys/types.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include "disasm.h" #include "json_writer.h" diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore new file mode 100644 index 000000000000..90a456a2a72f --- /dev/null +++ b/tools/bpf/runqslower/.gitignore @@ -0,0 +1 @@ +/.output diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile new file mode 100644 index 000000000000..39edd68afa8e --- /dev/null +++ b/tools/bpf/runqslower/Makefile @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG ?= clang +LLC ?= llc +LLVM_STRIP ?= llvm-strip +DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +LIBBPF_SRC := $(abspath ../../lib/bpf) +BPFOBJ := $(OUTPUT)/libbpf.a +BPF_INCLUDE := $(OUTPUT) +INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib) +CFLAGS := -g -Wall + +# Try to detect best kernel BTF source +KERNEL_REL := $(shell uname -r) +VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) +VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ + $(wildcard $(VMLINUX_BTF_PATHS)))) + +abs_out := $(abspath $(OUTPUT)) +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +.DELETE_ON_ERROR: + +.PHONY: all clean runqslower +all: runqslower + +runqslower: $(OUTPUT)/runqslower + +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) runqslower + +$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) $^ -lelf -lz -o $@ + +$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ + $(OUTPUT)/runqslower.bpf.o + +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h + +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ + -c $(filter %.c,$^) -o $@ && \ + $(LLVM_STRIP) -g $@ + +$(OUTPUT)/%.o: %.c | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $(OUTPUT) + +$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN,$@) + $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ + echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ + "specify its location." >&2; \ + exit 1;\ + fi + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ + +$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ + OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + +$(DEFAULT_BPFTOOL): + $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c new file mode 100644 index 000000000000..48a39f72fadf --- /dev/null +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "runqslower.h" + +#define TASK_RUNNING 0 + +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK + +const volatile __u64 min_us = 0; +const volatile pid_t targ_pid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +/* record enqueue timestamp */ +__always_inline +static int trace_enqueue(u32 tgid, u32 pid) +{ + u64 ts; + + if (!pid || (targ_pid && targ_pid != pid)) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, 0); + return 0; +} + +SEC("tp_btf/sched_wakeup") +int handle__sched_wakeup(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_wakeup_new") +int handle__sched_wakeup_new(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_switch") +int handle__sched_switch(u64 *ctx) +{ + /* TP_PROTO(bool preempt, struct task_struct *prev, + * struct task_struct *next) + */ + struct task_struct *prev = (struct task_struct *)ctx[1]; + struct task_struct *next = (struct task_struct *)ctx[2]; + struct event event = {}; + u64 *tsp, delta_us; + long state; + u32 pid; + + /* ivcsw: treat like an enqueue event and store timestamp */ + if (prev->state == TASK_RUNNING) + trace_enqueue(prev->tgid, prev->pid); + + pid = next->pid; + + /* fetch timestamp and calculate delta */ + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + return 0; /* missed enqueue */ + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + if (min_us && delta_us <= min_us) + return 0; + + event.pid = pid; + event.delta_us = delta_us; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + /* output */ + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + + bpf_map_delete_elem(&start, &pid); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c new file mode 100644 index 000000000000..d89715844952 --- /dev/null +++ b/tools/bpf/runqslower/runqslower.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <time.h> +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include "runqslower.h" +#include "runqslower.skel.h" + +struct env { + pid_t pid; + __u64 min_us; + bool verbose; +} env = { + .min_us = 10000, +}; + +const char *argp_program_version = "runqslower 0.1"; +const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; +const char argp_program_doc[] = +"runqslower Trace long process scheduling delays.\n" +" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n" +"\n" +"This script traces high scheduling delays between tasks being\n" +"ready to run and them running on CPU after that.\n" +"\n" +"USAGE: runqslower [-p PID] [min_us]\n" +"\n" +"EXAMPLES:\n" +" runqslower # trace run queue latency higher than 10000 us (default)\n" +" runqslower 1000 # trace run queue latency higher than 1000 us\n" +" runqslower -p 123 # trace pid 123 only\n"; + +static const struct argp_option opts[] = { + { "pid", 'p', "PID", 0, "Process PID to trace"}, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + int pid; + long long min_us; + + switch (key) { + case 'v': + env.verbose = true; + break; + case 'p': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid PID: %s\n", arg); + argp_usage(state); + } + env.pid = pid; + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", arg); + argp_usage(state); + } + env.min_us = min_us; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[32]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us); +} + +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer_opts pb_opts; + struct perf_buffer *pb = NULL; + struct runqslower_bpf *obj; + int err; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + return 1; + } + + obj = runqslower_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + /* initialize global data (filtering options) */ + obj->rodata->targ_pid = env.pid; + obj->rodata->min_us = env.min_us; + + err = runqslower_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = runqslower_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + printf("Tracing run queue latency higher than %llu us\n", env.min_us); + printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); + + pb_opts.sample_cb = handle_event; + pb_opts.lost_cb = handle_lost_events; + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; + fprintf(stderr, "failed to open perf buffer: %d\n", err); + goto cleanup; + } + + while ((err = perf_buffer__poll(pb, 100)) >= 0) + ; + printf("Error polling perf buffer: %d\n", err); + +cleanup: + perf_buffer__free(pb); + runqslower_bpf__destroy(obj); + + return err != 0; +} diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h new file mode 100644 index 000000000000..9db225425e5f --- /dev/null +++ b/tools/bpf/runqslower/runqslower.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __RUNQSLOWER_H +#define __RUNQSLOWER_H + +#define TASK_COMM_LEN 16 + +struct event { + char task[TASK_COMM_LEN]; + __u64 delta_us; + pid_t pid; +}; + +#endif /* __RUNQSLOWER_H */ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index f30a89046aa3..7ac0d8088565 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -197,7 +197,7 @@ $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto $(OUTPUT)test-gtk2.bin: - $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) + $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) -Wno-deprecated-declarations $(OUTPUT)test-gtk2-infobar.bin: $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) diff --git a/tools/build/feature/test-clang.cpp b/tools/build/feature/test-clang.cpp index a2b3f092d2f0..7d87075cd1c5 100644 --- a/tools/build/feature/test-clang.cpp +++ b/tools/build/feature/test-clang.cpp @@ -1,9 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#include "clang/Basic/Version.h" +#if CLANG_VERSION_MAJOR < 8 #include "clang/Basic/VirtualFileSystem.h" +#endif #include "clang/Driver/Driver.h" #include "clang/Frontend/TextDiagnosticPrinter.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/Support/ManagedStatic.h" +#if CLANG_VERSION_MAJOR >= 8 +#include "llvm/Support/VirtualFileSystem.h" +#endif #include "llvm/Support/raw_ostream.h" using namespace clang; diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index f79b23582a1d..7427a5ee761b 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -72,7 +72,7 @@ class BlkgIterator: name = BlkgIterator.blkcg_name(blkcg) path = parent_path + '/' + name if parent_path else name blkg = drgn.Object(prog, 'struct blkcg_gq', - address=radix_tree_lookup(blkcg.blkg_tree, q_id)) + address=radix_tree_lookup(blkcg.blkg_tree.address_of_(), q_id)) if not blkg.address_: return @@ -228,7 +228,7 @@ q_id = None root_iocg = None ioc = None -for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree): +for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()): blkg = drgn.Object(prog, 'struct blkcg_gq', address=ptr) try: if devname == blkg.q.kobj.parent.name.string_().decode('utf-8'): diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index aea2d91ab364..16d629b22c25 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg) error = 0; done: + if (error) + target_fname[0] = '\0'; return error; } @@ -108,15 +110,29 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg) return ret; } +/* + * Reset target_fname to "" in the two below functions for hibernation: if + * the fcopy operation is aborted by hibernation, the daemon should remove the + * partially-copied file; to achieve this, the hv_utils driver always fakes a + * CANCEL_FCOPY message upon suspend, and later when the VM resumes back, + * the daemon calls hv_copy_cancel() to remove the file; if a file is copied + * successfully before suspend, hv_copy_finished() must reset target_fname to + * avoid that the file can be incorrectly removed upon resume, since the faked + * CANCEL_FCOPY message is spurious in this case. + */ static int hv_copy_finished(void) { close(target_fd); + target_fname[0] = '\0'; return 0; } static int hv_copy_cancel(void) { close(target_fd); - unlink(target_fname); + if (strlen(target_fname) > 0) { + unlink(target_fname); + target_fname[0] = '\0'; + } return 0; } @@ -131,7 +147,7 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fcopy_fd; + int fcopy_fd = -1; int error; int daemonize = 1, long_index = 0, opt; int version = FCOPY_CURRENT_VERSION; @@ -141,7 +157,7 @@ int main(int argc, char *argv[]) struct hv_do_fcopy copy; __u32 kernel_modver; } buffer = { }; - int in_handshake = 1; + int in_handshake; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -170,6 +186,12 @@ int main(int argc, char *argv[]) openlog("HV_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); +reopen_fcopy_fd: + if (fcopy_fd != -1) + close(fcopy_fd); + /* Remove any possible partially-copied file on error */ + hv_copy_cancel(); + in_handshake = 1; fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR); if (fcopy_fd < 0) { @@ -196,7 +218,7 @@ int main(int argc, char *argv[]) len = pread(fcopy_fd, &buffer, sizeof(buffer), 0); if (len < 0) { syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + goto reopen_fcopy_fd; } if (in_handshake) { @@ -231,9 +253,14 @@ int main(int argc, char *argv[]) } + /* + * pwrite() may return an error due to the faked CANCEL_FCOPY + * message upon hibernation. Ignore the error by resetting the + * dev file, i.e. closing and re-opening it. + */ if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) { syslog(LOG_ERR, "pwrite failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + goto reopen_fcopy_fd; } } } diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index e9ef4ca6a655..ee9c1bb2293e 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -76,7 +76,7 @@ enum { DNS }; -static int in_hand_shake = 1; +static int in_hand_shake; static char *os_name = ""; static char *os_major = ""; @@ -1360,7 +1360,7 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int kvp_fd, len; + int kvp_fd = -1, len; int error; struct pollfd pfd; char *p; @@ -1400,14 +1400,6 @@ int main(int argc, char *argv[]) openlog("KVP", 0, LOG_USER); syslog(LOG_INFO, "KVP starting; pid is:%d", getpid()); - kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC); - - if (kvp_fd < 0) { - syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", - errno, strerror(errno)); - exit(EXIT_FAILURE); - } - /* * Retrieve OS release information. */ @@ -1423,6 +1415,18 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } +reopen_kvp_fd: + if (kvp_fd != -1) + close(kvp_fd); + in_hand_shake = 1; + kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC); + + if (kvp_fd < 0) { + syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", + errno, strerror(errno)); + exit(EXIT_FAILURE); + } + /* * Register ourselves with the kernel. */ @@ -1456,9 +1460,7 @@ int main(int argc, char *argv[]) if (len != sizeof(struct hv_kvp_msg)) { syslog(LOG_ERR, "read failed; error:%d %s", errno, strerror(errno)); - - close(kvp_fd); - return EXIT_FAILURE; + goto reopen_kvp_fd; } /* @@ -1617,13 +1619,17 @@ int main(int argc, char *argv[]) break; } - /* Send the value back to the kernel. */ + /* + * Send the value back to the kernel. Note: the write() may + * return an error due to hibernation; we can ignore the error + * by resetting the dev file, i.e. closing and re-opening it. + */ kvp_done: len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg)); if (len != sizeof(struct hv_kvp_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - exit(EXIT_FAILURE); + goto reopen_kvp_fd; } } diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 92902a88f671..dd111870beee 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -28,6 +28,8 @@ #include <stdbool.h> #include <dirent.h> +static bool fs_frozen; + /* Don't use syslog() in the function since that can cause write to disk */ static int vss_do_freeze(char *dir, unsigned int cmd) { @@ -155,18 +157,27 @@ static int vss_operate(int operation) continue; } error |= vss_do_freeze(ent->mnt_dir, cmd); - if (error && operation == VSS_OP_FREEZE) - goto err; + if (operation == VSS_OP_FREEZE) { + if (error) + goto err; + fs_frozen = true; + } } endmntent(mounts); if (root_seen) { error |= vss_do_freeze("/", cmd); - if (error && operation == VSS_OP_FREEZE) - goto err; + if (operation == VSS_OP_FREEZE) { + if (error) + goto err; + fs_frozen = true; + } } + if (operation == VSS_OP_THAW && !error) + fs_frozen = false; + goto out; err: save_errno = errno; @@ -175,6 +186,7 @@ err: endmntent(mounts); } vss_operate(VSS_OP_THAW); + fs_frozen = false; /* Call syslog after we thaw all filesystems */ if (ent) syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s", @@ -196,13 +208,13 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int vss_fd, len; + int vss_fd = -1, len; int error; struct pollfd pfd; int op; struct hv_vss_msg vss_msg[1]; int daemonize = 1, long_index = 0, opt; - int in_handshake = 1; + int in_handshake; __u32 kernel_modver; static struct option long_options[] = { @@ -232,6 +244,18 @@ int main(int argc, char *argv[]) openlog("Hyper-V VSS", 0, LOG_USER); syslog(LOG_INFO, "VSS starting; pid is:%d", getpid()); +reopen_vss_fd: + if (vss_fd != -1) + close(vss_fd); + if (fs_frozen) { + if (vss_operate(VSS_OP_THAW) || fs_frozen) { + syslog(LOG_ERR, "failed to thaw file system: err=%d", + errno); + exit(EXIT_FAILURE); + } + } + + in_handshake = 1; vss_fd = open("/dev/vmbus/hv_vss", O_RDWR); if (vss_fd < 0) { syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s", @@ -284,8 +308,7 @@ int main(int argc, char *argv[]) if (len != sizeof(struct hv_vss_msg)) { syslog(LOG_ERR, "read failed; error:%d %s", errno, strerror(errno)); - close(vss_fd); - return EXIT_FAILURE; + goto reopen_vss_fd; } op = vss_msg->vss_hdr.operation; @@ -312,14 +335,18 @@ int main(int argc, char *argv[]) default: syslog(LOG_ERR, "Illegal op:%d\n", op); } + + /* + * The write() may return an error due to the faked VSS_OP_THAW + * message upon hibernation. Ignore the error by resetting the + * dev file, i.e. closing and re-opening it. + */ vss_msg->error = error; len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg)); if (len != sizeof(struct hv_vss_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); - - if (op == VSS_OP_FREEZE) - vss_operate(VSS_OP_THAW); + goto reopen_vss_fd; } } diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 05dca5c203f3..477a1cae513f 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -15,6 +15,8 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); void bitmap_clear(unsigned long *map, unsigned int start, int len); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) @@ -124,6 +126,15 @@ static inline unsigned long *bitmap_alloc(int nbits) } /* + * bitmap_free - Free bitmap + * @bitmap: pointer to bitmap + */ +static inline void bitmap_free(unsigned long *bitmap) +{ + free(bitmap); +} + +/* * bitmap_scnprintf - print bitmap list into buffer * @bitmap: bitmap * @nbits: size of bitmap @@ -148,4 +159,23 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, return __bitmap_and(dst, src1, src2, nbits); } +#ifdef __LITTLE_ENDIAN +#define BITMAP_MEM_ALIGNMENT 8 +#else +#define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) +#endif +#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +static inline int bitmap_equal(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); + if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + return !memcmp(src1, src2, nbits / 8); + return __bitmap_equal(src1, src2, nbits); +} + #endif /* _PERF_BITOPS_H */ diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 140c8362f113..5fca38fe1ba8 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -14,10 +14,11 @@ #include <linux/bits.h> #include <linux/compiler.h> -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) -#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) -#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 980cb9266718..5e9e781905ed 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -17,7 +17,15 @@ int strtobool(const char *s, bool *res); * However uClibc headers also define __GLIBC__ hence the hack below */ #if defined(__GLIBC__) && !defined(__UCLIBC__) +// pragma diagnostic was introduced in gcc 4.6 +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif extern size_t strlcpy(char *dest, const char *src, size_t size); +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic pop +#endif #endif char *str_error_r(int errnum, char *buf, size_t buflen); diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index c160a5354eb6..f94f65d429be 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -11,6 +11,8 @@ #define PROT_WRITE 0x2 /* page can be written */ #define PROT_EXEC 0x4 /* page can be executed */ #define PROT_SEM 0x8 /* page may be used for atomic ops */ +/* 0x10 reserved for arch-specific use */ +/* 0x20 reserved for arch-specific use */ #define PROT_NONE 0x0 /* page can not be accessed */ #define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ #define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 1fc8faa6e973..3a3201e4618e 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -851,8 +851,13 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open) __SYSCALL(__NR_clone3, sys_clone3) #endif +#define __NR_openat2 437 +__SYSCALL(__NR_openat2, sys_openat2) +#define __NR_pidfd_getfd 438 +__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) + #undef __NR_syscalls -#define __NR_syscalls 436 +#define __NR_syscalls 439 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 13a58531e6fa..39acc149d843 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -2,6 +2,8 @@ #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" +#elif defined(__riscv) +#include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h" #else #include <uapi/asm-generic/bpf_perf_event.h> #endif diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index ce3c5945a1c4..637189ec1ab9 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -1,18 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #if defined(__i386__) || defined(__x86_64__) -#include "../../arch/x86/include/uapi/asm/errno.h" +#include "../../../arch/x86/include/uapi/asm/errno.h" #elif defined(__powerpc__) -#include "../../arch/powerpc/include/uapi/asm/errno.h" +#include "../../../arch/powerpc/include/uapi/asm/errno.h" #elif defined(__sparc__) -#include "../../arch/sparc/include/uapi/asm/errno.h" +#include "../../../arch/sparc/include/uapi/asm/errno.h" #elif defined(__alpha__) -#include "../../arch/alpha/include/uapi/asm/errno.h" +#include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) -#include "../../arch/mips/include/uapi/asm/errno.h" +#include "../../../arch/mips/include/uapi/asm/errno.h" #elif defined(__ia64__) -#include "../../arch/ia64/include/uapi/asm/errno.h" +#include "../../../arch/ia64/include/uapi/asm/errno.h" #elif defined(__xtensa__) -#include "../../arch/xtensa/include/uapi/asm/errno.h" +#include "../../../arch/xtensa/include/uapi/asm/errno.h" #else #include <asm-generic/errno.h> #endif diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 5400d7e057f1..829c0a48577f 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -395,6 +395,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) #define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset) #define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) #define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) #define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) @@ -793,6 +794,37 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +struct drm_i915_gem_mmap_offset { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; + + /** + * Flags for extended behaviour. + * + * It is mandatory that one of the MMAP_OFFSET types + * (GTT, WC, WB, UC, etc) should be included. + */ + __u64 flags; +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 + + /* + * Zero-terminated chain of extensions. + * + * No current extensions defined; mbz. + */ + __u64 extensions; +}; + struct drm_i915_gem_set_domain { /** Handle for the object */ __u32 handle; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dbbcf0b02970..22f235260a3a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -107,6 +107,10 @@ enum bpf_cmd { BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, BPF_BTF_GET_NEXT_ID, + BPF_MAP_LOOKUP_BATCH, + BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, }; enum bpf_map_type { @@ -136,6 +140,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_DEVMAP_HASH, + BPF_MAP_TYPE_STRUCT_OPS, }; /* Note that tracing related programs such as @@ -174,6 +179,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_PROG_TYPE_TRACING, + BPF_PROG_TYPE_STRUCT_OPS, + BPF_PROG_TYPE_EXT, }; enum bpf_attach_type { @@ -231,6 +238,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +261,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -351,7 +364,12 @@ enum bpf_attach_type { /* Enable memory-mapping BPF map */ #define BPF_F_MMAPABLE (1U << 10) -/* flags for BPF_PROG_QUERY */ +/* Flags for BPF_PROG_QUERY. */ + +/* Query effective (directly attached + inherited from ancestor cgroups) + * programs that will be executed for events within a cgroup. + * attach_flags with this flag are returned only for directly attached programs. + */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) enum bpf_stack_build_id_status { @@ -391,6 +409,10 @@ union bpf_attr { __u32 btf_fd; /* fd pointing to a BTF type data */ __u32 btf_key_type_id; /* BTF type_id of the key */ __u32 btf_value_type_id; /* BTF type_id of the value */ + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- + * struct stored as the + * map value + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -403,6 +425,23 @@ union bpf_attr { __u64 flags; }; + struct { /* struct used by BPF_MAP_*_BATCH commands */ + __aligned_u64 in_batch; /* start batch, + * NULL to start from beginning + */ + __aligned_u64 out_batch; /* output: next start batch */ + __aligned_u64 keys; + __aligned_u64 values; + __u32 count; /* input/output: + * input: # of key/value + * elements + * output: # of filled elements + */ + __u32 map_fd; + __u64 elem_flags; + __u64 flags; + } batch; + struct { /* anonymous struct used by BPF_PROG_LOAD command */ __u32 prog_type; /* one of enum bpf_prog_type */ __u32 insn_cnt; @@ -442,6 +481,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ @@ -1002,9 +1045,9 @@ union bpf_attr { * supports redirection to the egress interface, and accepts no * flag at all. * - * The same effect can be attained with the more generic - * **bpf_redirect_map**\ (), which requires specific maps to be - * used but offers better performance. + * The same effect can also be attained with the more generic + * **bpf_redirect_map**\ (), which uses a BPF map to store the + * redirect target instead of providing it directly to the helper. * Return * For XDP, the helper returns **XDP_REDIRECT** on success or * **XDP_ABORTED** on error. For other program types, the values @@ -1568,13 +1611,11 @@ union bpf_attr { * the caller. Any higher bits in the *flags* argument must be * unset. * - * When used to redirect packets to net devices, this helper - * provides a high performance increase over **bpf_redirect**\ (). - * This is due to various implementation details of the underlying - * mechanisms, one of which is the fact that **bpf_redirect_map**\ - * () tries to send packet as a "bulk" to the device. + * See also bpf_redirect(), which only supports redirecting to an + * ifindex, but doesn't require a map to do so. * Return - * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error. + * **XDP_REDIRECT** on success, or the value of the two lower bits + * of the **flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description @@ -2693,7 +2734,8 @@ union bpf_attr { * * int bpf_send_signal(u32 sig) * Description - * Send signal *sig* to the current task. + * Send signal *sig* to the process of the current task. + * The signal may be delivered to any of this process's threads. * Return * 0 on success or successfully queued. * @@ -2821,6 +2863,33 @@ union bpf_attr { * Return * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. + * + * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * Description + * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. + * *rcv_nxt* is the ack_seq to be sent out. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_send_signal_thread(u32 sig) + * Description + * Send signal *sig* to the thread corresponding to the current task. + * Return + * 0 on success or successfully queued. + * + * **-EBUSY** if work queue under nmi is full. + * + * **-EINVAL** if *sig* is invalid. + * + * **-EPERM** if no permission to send the *sig*. + * + * **-EAGAIN** if bpf program can try again. + * + * u64 bpf_jiffies64(void) + * Description + * Obtain the 64bit jiffies + * Return + * The 64 bit jiffies */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2938,7 +3007,10 @@ union bpf_attr { FN(probe_read_user), \ FN(probe_read_kernel), \ FN(probe_read_user_str), \ - FN(probe_read_kernel_str), + FN(probe_read_kernel_str), \ + FN(tcp_send_ack), \ + FN(send_signal_thread), \ + FN(jiffies64), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3339,7 +3411,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; - __u32 :32; + __u32 btf_vmlinux_value_type_id; __u64 netns_dev; __u64 netns_ino; __u32 btf_id; diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 63ae4a39e58b..5a667107ad2c 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -22,9 +22,9 @@ struct btf_header { }; /* Max # of type identifier */ -#define BTF_MAX_TYPE 0x0000ffff +#define BTF_MAX_TYPE 0x000fffff /* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x0000ffff +#define BTF_MAX_NAME_OFFSET 0x00ffffff /* Max # of struct/union/enum members or func args */ #define BTF_MAX_VLEN 0xffff @@ -142,7 +142,14 @@ struct btf_param { enum { BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, + BTF_VAR_GLOBAL_ALLOCATED = 1, + BTF_VAR_GLOBAL_EXTERN = 2, +}; + +enum btf_func_linkage { + BTF_FUNC_STATIC = 0, + BTF_FUNC_GLOBAL = 1, + BTF_FUNC_EXTERN = 2, }; /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 1f97b33c840e..ca88b7bce553 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -3,6 +3,7 @@ #define _UAPI_LINUX_FCNTL_H #include <asm/fcntl.h> +#include <linux/openat2.h> #define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0) #define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1) @@ -100,5 +101,4 @@ #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ - #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 1beb174ad950..0d8a6f47711c 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -8,6 +8,7 @@ #ifndef _UAPI_LINUX_FSCRYPT_H #define _UAPI_LINUX_FSCRYPT_H +#include <linux/ioctl.h> #include <linux/types.h> /* Encryption policy flags */ @@ -109,11 +110,22 @@ struct fscrypt_key_specifier { } u; }; +/* + * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by + * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw. + */ +struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; +}; + /* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8aec8769d944..024af2d1d0af 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -169,6 +169,7 @@ enum { IFLA_MAX_MTU, IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ + IFLA_PERM_ADDRESS, __IFLA_MAX }; @@ -485,6 +486,13 @@ enum macsec_validation_type { MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, }; +enum macsec_offload { + MACSEC_OFFLOAD_OFF = 0, + MACSEC_OFFLOAD_PHY = 1, + __MACSEC_OFFLOAD_END, + MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, +}; + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e7ad9d350a28..1521073b6348 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -76,6 +76,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_MPTCP = 262, /* Multipath TCP connection */ +#define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX }; #endif diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f0a16b4adbbd..4b95f9a31a2f 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1009,6 +1009,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 #define KVM_CAP_ARM_NISV_TO_USER 177 #define KVM_CAP_ARM_INJECT_EXT_DABT 178 +#define KVM_CAP_S390_VCPU_RESETS 179 #ifdef KVM_CAP_IRQ_ROUTING @@ -1473,6 +1474,10 @@ struct kvm_enc_region { /* Available with KVM_CAP_ARM_SVE */ #define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) +/* Available with KVM_CAP_S390_VCPU_RESETS */ +#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) +#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h new file mode 100644 index 000000000000..58b1eb711360 --- /dev/null +++ b/tools/include/uapi/linux/openat2.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_OPENAT2_H +#define _UAPI_LINUX_OPENAT2_H + +#include <linux/types.h> + +/* + * Arguments for how openat2(2) should open the target path. If only @flags and + * @mode are non-zero, then openat2(2) operates very similarly to openat(2). + * + * However, unlike openat(2), unknown or invalid bits in @flags result in + * -EINVAL rather than being silently ignored. @mode must be zero unless one of + * {O_CREAT, O_TMPFILE} are set. + * + * @flags: O_* flags. + * @mode: O_CREAT/O_TMPFILE file mode. + * @resolve: RESOLVE_* flags. + */ +struct open_how { + __u64 flags; + __u64 mode; + __u64 resolve; +}; + +/* how->resolve flags for openat2(2). */ +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings + (includes bind-mounts). */ +#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style + "magic-links". */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks + (implies OEXT_NO_MAGICLINKS) */ +#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like + "..", symlinks, and absolute + paths which escape the dirfd. */ +#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." + be scoped inside the dirfd + (similar to chroot(2)). */ + +#endif /* _UAPI_LINUX_OPENAT2_H */ diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 7da1b37b27aa..07b4f8131e36 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -234,4 +234,8 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* Control reclaim behavior when allocating memory */ +#define PR_SET_IO_FLUSHER 57 +#define PR_GET_IO_FLUSHER 58 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h index 4a0217832464..2e3bc22c6f20 100644 --- a/tools/include/uapi/linux/sched.h +++ b/tools/include/uapi/linux/sched.h @@ -36,6 +36,12 @@ /* Flags for the clone3() syscall. */ #define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ +/* + * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 + * syscalls only: + */ +#define CLONE_NEWTIME 0x00000080 /* New time namespace */ + #ifndef __ASSEMBLY__ /** * struct clone_args - arguments for the clone3 syscall diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index df1153cea0b7..535a7229e1d9 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -26,7 +26,9 @@ #if defined(__KERNEL__) || defined(__linux__) #include <linux/types.h> +#include <asm/byteorder.h> #else +#include <endian.h> #include <sys/ioctl.h> #endif @@ -154,7 +156,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 14) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 15) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -301,7 +303,9 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ - +#if (__BITS_PER_LONG == 32 && defined(__USE_TIME_BITS64)) || defined __KERNEL__ +#define __SND_STRUCT_TIME64 +#endif typedef int __bitwise snd_pcm_state_t; #define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */ @@ -317,8 +321,17 @@ typedef int __bitwise snd_pcm_state_t; enum { SNDRV_PCM_MMAP_OFFSET_DATA = 0x00000000, - SNDRV_PCM_MMAP_OFFSET_STATUS = 0x80000000, - SNDRV_PCM_MMAP_OFFSET_CONTROL = 0x81000000, + SNDRV_PCM_MMAP_OFFSET_STATUS_OLD = 0x80000000, + SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD = 0x81000000, + SNDRV_PCM_MMAP_OFFSET_STATUS_NEW = 0x82000000, + SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW = 0x83000000, +#ifdef __SND_STRUCT_TIME64 + SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_NEW, + SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_NEW, +#else + SNDRV_PCM_MMAP_OFFSET_STATUS = SNDRV_PCM_MMAP_OFFSET_STATUS_OLD, + SNDRV_PCM_MMAP_OFFSET_CONTROL = SNDRV_PCM_MMAP_OFFSET_CONTROL_OLD, +#endif }; union snd_pcm_sync_id { @@ -456,8 +469,13 @@ enum { SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED }; +#ifndef __KERNEL__ +/* explicit padding avoids incompatibility between i386 and x86-64 */ +typedef struct { unsigned char pad[sizeof(time_t) - sizeof(int)]; } __time_pad; + struct snd_pcm_status { snd_pcm_state_t state; /* stream state */ + __time_pad pad1; /* align to timespec */ struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */ struct timespec tstamp; /* reference timestamp */ snd_pcm_uframes_t appl_ptr; /* appl ptr */ @@ -473,17 +491,48 @@ struct snd_pcm_status { __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */ unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */ }; +#endif + +/* + * For mmap operations, we need the 64-bit layout, both for compat mode, + * and for y2038 compatibility. For 64-bit applications, the two definitions + * are identical, so we keep the traditional version. + */ +#ifdef __SND_STRUCT_TIME64 +#define __snd_pcm_mmap_status64 snd_pcm_mmap_status +#define __snd_pcm_mmap_control64 snd_pcm_mmap_control +#define __snd_pcm_sync_ptr64 snd_pcm_sync_ptr +#ifdef __KERNEL__ +#define __snd_timespec64 __kernel_timespec +#else +#define __snd_timespec64 timespec +#endif +struct __snd_timespec { + __s32 tv_sec; + __s32 tv_nsec; +}; +#else +#define __snd_pcm_mmap_status snd_pcm_mmap_status +#define __snd_pcm_mmap_control snd_pcm_mmap_control +#define __snd_pcm_sync_ptr snd_pcm_sync_ptr +#define __snd_timespec timespec +struct __snd_timespec64 { + __s64 tv_sec; + __s64 tv_nsec; +}; -struct snd_pcm_mmap_status { +#endif + +struct __snd_pcm_mmap_status { snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */ int pad1; /* Needed for 64 bit alignment */ snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */ - struct timespec tstamp; /* Timestamp */ + struct __snd_timespec tstamp; /* Timestamp */ snd_pcm_state_t suspended_state; /* RO: suspended stream state */ - struct timespec audio_tstamp; /* from sample counter or wall clock */ + struct __snd_timespec audio_tstamp; /* from sample counter or wall clock */ }; -struct snd_pcm_mmap_control { +struct __snd_pcm_mmap_control { snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */ snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */ }; @@ -492,14 +541,59 @@ struct snd_pcm_mmap_control { #define SNDRV_PCM_SYNC_PTR_APPL (1<<1) /* get appl_ptr from driver (r/w op) */ #define SNDRV_PCM_SYNC_PTR_AVAIL_MIN (1<<2) /* get avail_min from driver */ -struct snd_pcm_sync_ptr { +struct __snd_pcm_sync_ptr { unsigned int flags; union { - struct snd_pcm_mmap_status status; + struct __snd_pcm_mmap_status status; + unsigned char reserved[64]; + } s; + union { + struct __snd_pcm_mmap_control control; + unsigned char reserved[64]; + } c; +}; + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN) +typedef char __pad_before_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)]; +typedef char __pad_after_uframe[0]; +#endif + +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) +typedef char __pad_before_uframe[0]; +typedef char __pad_after_uframe[sizeof(__u64) - sizeof(snd_pcm_uframes_t)]; +#endif + +struct __snd_pcm_mmap_status64 { + snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */ + __u32 pad1; /* Needed for 64 bit alignment */ + __pad_before_uframe __pad1; + snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */ + __pad_after_uframe __pad2; + struct __snd_timespec64 tstamp; /* Timestamp */ + snd_pcm_state_t suspended_state;/* RO: suspended stream state */ + __u32 pad3; /* Needed for 64 bit alignment */ + struct __snd_timespec64 audio_tstamp; /* sample counter or wall clock */ +}; + +struct __snd_pcm_mmap_control64 { + __pad_before_uframe __pad1; + snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */ + __pad_before_uframe __pad2; + + __pad_before_uframe __pad3; + snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */ + __pad_after_uframe __pad4; +}; + +struct __snd_pcm_sync_ptr64 { + __u32 flags; + __u32 pad1; + union { + struct __snd_pcm_mmap_status64 status; unsigned char reserved[64]; } s; union { - struct snd_pcm_mmap_control control; + struct __snd_pcm_mmap_control64 control; unsigned char reserved[64]; } c; }; @@ -584,6 +678,8 @@ enum { #define SNDRV_PCM_IOCTL_STATUS _IOR('A', 0x20, struct snd_pcm_status) #define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t) #define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22) +#define __SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct __snd_pcm_sync_ptr) +#define __SNDRV_PCM_IOCTL_SYNC_PTR64 _IOWR('A', 0x23, struct __snd_pcm_sync_ptr64) #define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr) #define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status) #define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info) @@ -614,7 +710,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 0) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 1) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -648,13 +744,16 @@ struct snd_rawmidi_params { unsigned char reserved[16]; /* reserved for future use */ }; +#ifndef __KERNEL__ struct snd_rawmidi_status { int stream; + __time_pad pad1; struct timespec tstamp; /* Timestamp */ size_t avail; /* available bytes */ size_t xruns; /* count of overruns since last status (in bytes) */ unsigned char reserved[16]; /* reserved for future use */ }; +#endif #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) @@ -667,7 +766,7 @@ struct snd_rawmidi_status { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 6) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) enum { SNDRV_TIMER_CLASS_NONE = -1, @@ -761,6 +860,7 @@ struct snd_timer_params { unsigned char reserved[60]; /* reserved */ }; +#ifndef __KERNEL__ struct snd_timer_status { struct timespec tstamp; /* Timestamp - last update */ unsigned int resolution; /* current period resolution in ns */ @@ -769,10 +869,11 @@ struct snd_timer_status { unsigned int queue; /* used queue size */ unsigned char reserved[64]; /* reserved */ }; +#endif #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) -#define SNDRV_TIMER_IOCTL_TREAD _IOW('T', 0x02, int) +#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int) #define SNDRV_TIMER_IOCTL_GINFO _IOWR('T', 0x03, struct snd_timer_ginfo) #define SNDRV_TIMER_IOCTL_GPARAMS _IOW('T', 0x04, struct snd_timer_gparams) #define SNDRV_TIMER_IOCTL_GSTATUS _IOWR('T', 0x05, struct snd_timer_gstatus) @@ -785,6 +886,15 @@ struct snd_timer_status { #define SNDRV_TIMER_IOCTL_STOP _IO('T', 0xa1) #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) +#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int) + +#if __BITS_PER_LONG == 64 +#define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD +#else +#define SNDRV_TIMER_IOCTL_TREAD ((sizeof(__kernel_long_t) >= sizeof(time_t)) ? \ + SNDRV_TIMER_IOCTL_TREAD_OLD : \ + SNDRV_TIMER_IOCTL_TREAD64) +#endif struct snd_timer_read { unsigned int resolution; @@ -810,11 +920,15 @@ enum { SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10, }; +#ifndef __KERNEL__ struct snd_timer_tread { int event; + __time_pad pad1; struct timespec tstamp; unsigned int val; + __time_pad pad2; }; +#endif /**************************************************************************** * * @@ -822,7 +936,7 @@ struct snd_timer_tread { * * ****************************************************************************/ -#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) +#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) struct snd_ctl_card_info { int card; /* card number */ @@ -860,7 +974,7 @@ typedef int __bitwise snd_ctl_elem_iface_t; #define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1) #define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE) #define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */ -#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<3) /* when was control changed */ +// (1 << 3) is unused. #define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */ #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE) @@ -926,11 +1040,7 @@ struct snd_ctl_elem_info { } enumerated; unsigned char reserved[128]; } value; - union { - unsigned short d[4]; /* dimensions */ - unsigned short *d_ptr; /* indirect - obsoleted */ - } dimen; - unsigned char reserved[64-4*sizeof(unsigned short)]; + unsigned char reserved[64]; }; struct snd_ctl_elem_value { @@ -955,8 +1065,7 @@ struct snd_ctl_elem_value { } bytes; struct snd_aes_iec958 iec958; } value; /* RO */ - struct timespec tstamp; - unsigned char reserved[128-sizeof(struct timespec)]; + unsigned char reserved[128]; }; struct snd_ctl_tlv { diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index ad1b9e646c49..4cf93110c259 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -270,6 +270,7 @@ class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons def debugfs_is_child(self, field): @@ -289,6 +290,7 @@ class ArchPPC(Arch): # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reason_field = 'exit_nr' self.exit_reasons = {} def debugfs_is_child(self, field): @@ -300,6 +302,7 @@ class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS def debugfs_is_child(self, field): @@ -311,6 +314,7 @@ class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = None self.exit_reasons = None def debugfs_is_child(self, field): @@ -541,8 +545,8 @@ class TracepointProvider(Provider): """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if ARCH.exit_reasons: - filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + if ARCH.exit_reason_field and ARCH.exit_reasons: + filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) return filters def _get_available_fields(self): diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 11b3885e833e..027b18f7ed8c 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 38494782be06..5043747ef6c5 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -71,3 +71,18 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, BITMAP_LAST_WORD_MASK(bits)); return result != 0; } + +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] != bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 97830e46d1a0..aee7f1a83c77 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,8 +56,8 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray -FEATURE_DISPLAY = libelf bpf +FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray +FEATURE_DISPLAY = libelf zlib bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) @@ -148,6 +148,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -160,7 +161,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -178,11 +179,11 @@ $(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) @@ -190,7 +191,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(QUIET_LINK)$(CC) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so "versioned in $(VERSION_SCRIPT)." >&2; \ readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -250,6 +252,7 @@ install_headers: $(BPF_HELPER_DEFS) $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \ @@ -270,20 +273,24 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ - *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ - $(SHARED_OBJDIR) $(STATIC_OBJDIR) + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ + $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ + $(addprefix $(OUTPUT), \ + *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -PHONY += force elfdep bpfdep cscope tags +PHONY += force elfdep zdep bpfdep cscope tags force: elfdep: @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi +zdep: + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi + bpfdep: @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 98596e15390f..c6dafe563176 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,6 +32,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -95,7 +98,11 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.btf_key_type_id = create_attr->btf_key_type_id; attr.btf_value_type_id = create_attr->btf_value_type_id; attr.map_ifindex = create_attr->map_ifindex; - attr.inner_map_fd = create_attr->inner_map_fd; + if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) + attr.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + attr.inner_map_fd = create_attr->inner_map_fd; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } @@ -228,7 +235,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_TRACING) { + if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) { + attr.attach_btf_id = load_attr->attach_btf_id; + } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || + attr.prog_type == BPF_PROG_TYPE_EXT) { attr.attach_btf_id = load_attr->attach_btf_id; attr.attach_prog_fd = load_attr->attach_prog_fd; } else { @@ -443,6 +453,64 @@ int bpf_map_freeze(int fd) return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); } +static int bpf_map_batch_common(int cmd, int fd, void *in_batch, + void *out_batch, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + union bpf_attr attr; + int ret; + + if (!OPTS_VALID(opts, bpf_map_batch_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.batch.map_fd = fd; + attr.batch.in_batch = ptr_to_u64(in_batch); + attr.batch.out_batch = ptr_to_u64(out_batch); + attr.batch.keys = ptr_to_u64(keys); + attr.batch.values = ptr_to_u64(values); + attr.batch.count = *count; + attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); + attr.batch.flags = OPTS_GET(opts, flags, 0); + + ret = sys_bpf(cmd, &attr, sizeof(attr)); + *count = attr.batch.count; + + return ret; +} + +int bpf_map_delete_batch(int fd, void *keys, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, + NULL, keys, NULL, count, opts); +} + +int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, + out_batch, keys, values, count, opts); +} + +int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, + fd, in_batch, out_batch, keys, values, + count, opts); +} + +int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, + keys, values, count, opts); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; @@ -467,13 +535,28 @@ int bpf_obj_get(const char *pathname) int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, unsigned int flags) { + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, + .flags = flags, + ); + + return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); +} + +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts) +{ union bpf_attr attr; + if (!OPTS_VALID(opts, bpf_prog_attach_opts)) + return -EINVAL; + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - attr.attach_flags = flags; + attr.attach_flags = OPTS_GET(opts, flags, 0); + attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 3c791fa8e68e..b976e77316cc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -28,14 +28,12 @@ #include <stddef.h> #include <stdint.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -48,7 +46,10 @@ struct bpf_create_map_attr { __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 map_ifindex; - __u32 inner_map_fd; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; }; LIBBPF_API int @@ -126,10 +127,43 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); + +struct bpf_map_batch_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u64 elem_flags; + __u64 flags; +}; +#define bpf_map_batch_opts__last_field flags + +LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, + void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts); + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); + +struct bpf_prog_attach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + unsigned int flags; + int replace_prog_fd; +}; +#define bpf_prog_attach_opts__last_field replace_prog_fd + LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 0c7d28292898..f69cc208778a 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -25,6 +25,9 @@ #ifndef __always_inline #define __always_inline __attribute__((always_inline)) #endif +#ifndef __weak +#define __weak __attribute__((weak)) +#endif /* * Helper structure used by eBPF C program @@ -44,4 +47,12 @@ enum libbpf_pin_type { LIBBPF_PIN_BY_NAME, }; +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + +#define __kconfig __attribute__((section(".kconfig"))) + #endif diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 3ed1a27b5f7c..bafca49cb1e6 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -8,6 +8,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + struct bpf_prog_linfo { void *raw_linfo; void *raw_jited_linfo; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 88efa2bb7137..3d1c25fc97ae 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -8,6 +8,10 @@ #include <fcntl.h> #include <unistd.h> #include <errno.h> +#include <sys/utsname.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> #include <gelf.h> @@ -17,8 +21,11 @@ #include "libbpf_internal.h" #include "hashmap.h" -#define BTF_MAX_NR_TYPES 0x7fffffff -#define BTF_MAX_STR_OFFSET 0x7fffffff +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +#define BTF_MAX_NR_TYPES 0x7fffffffU +#define BTF_MAX_STR_OFFSET 0x7fffffffU static struct btf_type btf_void; @@ -50,7 +57,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) if (btf->types_size == BTF_MAX_NR_TYPES) return -E2BIG; - expand_by = max(btf->types_size >> 2, 16); + expand_by = max(btf->types_size >> 2, 16U); new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); new_types = realloc(btf->types, sizeof(*new_types) * new_size); @@ -278,6 +285,45 @@ done: return nelems * size; } +int btf__align_of(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + __u16 kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + return min(sizeof(void *), (size_t)t->size); + case BTF_KIND_PTR: + return sizeof(void *); + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf__align_of(btf, t->type); + case BTF_KIND_ARRAY: + return btf__align_of(btf, btf_array(t)->type); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + int i, max_align = 1, align; + + for (i = 0; i < vlen; i++, m++) { + align = btf__align_of(btf, m->type); + if (align <= 0) + return align; + max_align = max(max_align, align); + } + + return max_align; + } + default: + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); + return 0; + } +} + int btf__resolve_type(const struct btf *btf, __u32 type_id) { const struct btf_type *t; @@ -539,6 +585,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, return -ENOENT; } + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + ret = bpf_object__section_size(obj, name, &size); if (ret || !size || (t->size && t->size != size)) { pr_debug("Invalid size for section %s: %u bytes\n", name, size); @@ -575,7 +627,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, vsi->offset = off; } - qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); return 0; } @@ -1352,7 +1405,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d, if (d->hypot_cnt == d->hypot_cap) { __u32 *new_list; - d->hypot_cap += max(16, d->hypot_cap / 2); + d->hypot_cap += max((size_t)16, d->hypot_cap / 2); new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); if (!new_list) return -ENOMEM; @@ -1648,7 +1701,7 @@ static int btf_dedup_strings(struct btf_dedup *d) if (strs.cnt + 1 > strs.cap) { struct btf_str_ptr *new_ptrs; - strs.cap += max(strs.cnt / 2, 16); + strs.cap += max(strs.cnt / 2, 16U); new_ptrs = realloc(strs.ptrs, sizeof(strs.ptrs[0]) * strs.cap); if (!new_ptrs) { @@ -2882,3 +2935,89 @@ static int btf_dedup_remap_types(struct btf_dedup *d) } return 0; } + +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + +/* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ +struct btf *libbpf_find_kernel_btf(void) +{ + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warn("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index d9ac73a02cde..70c1b7ec2bd0 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -8,14 +8,12 @@ #include <linux/btf.h> #include <linux/types.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" #define MAPS_ELF_SEC ".maps" @@ -79,6 +77,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); @@ -103,6 +102,8 @@ LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; @@ -127,6 +128,28 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); +struct btf_dump_emit_type_decl_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* optional field name for type declaration, e.g.: + * - struct my_struct <FNAME> + * - void (*<FNAME>)(int) + * - char (*<FNAME>)[123] + */ + const char *field_name; + /* extra indentation level (in number of tabs) to emit for multi-line + * type declarations (e.g., anonymous struct); applies for lines + * starting from the second one (first line is assumed to have + * necessary indentation already + */ + int indent_level; +}; +#define btf_dump_emit_type_decl_opts__last_field indent_level + +LIBBPF_API int +btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts); + /* * A set of helpers for easier BTF types handling */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index cb126d8fcf75..bd09ed1710f1 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -18,6 +18,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -116,6 +119,8 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_end(args); } +static int btf_dump_mark_referenced(struct btf_dump *d); + struct btf_dump *btf_dump__new(const struct btf *btf, const struct btf_ext *btf_ext, const struct btf_dump_opts *opts, @@ -137,18 +142,40 @@ struct btf_dump *btf_dump__new(const struct btf *btf, if (IS_ERR(d->type_names)) { err = PTR_ERR(d->type_names); d->type_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); + goto err; } d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); if (IS_ERR(d->ident_names)) { err = PTR_ERR(d->ident_names); d->ident_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); + goto err; + } + d->type_states = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->type_states[0])); + if (!d->type_states) { + err = -ENOMEM; + goto err; + } + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->cached_names[0])); + if (!d->cached_names) { + err = -ENOMEM; + goto err; } + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + goto err; + return d; +err: + btf_dump__free(d); + return ERR_PTR(err); } void btf_dump__free(struct btf_dump *d) @@ -175,7 +202,6 @@ void btf_dump__free(struct btf_dump *d) free(d); } -static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); @@ -202,27 +228,6 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) if (id > btf__get_nr_types(d->btf)) return -EINVAL; - /* type states are lazily allocated, as they might not be needed */ - if (!d->type_states) { - d->type_states = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->type_states[0])); - if (!d->type_states) - return -ENOMEM; - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->cached_names[0])); - if (!d->cached_names) - return -ENOMEM; - - /* VOID is special */ - d->type_states[0].order_state = ORDERED; - d->type_states[0].emit_state = EMITTED; - - /* eagerly determine referenced types for anon enums */ - err = btf_dump_mark_referenced(d); - if (err) - return err; - } - d->emit_queue_cnt = 0; err = btf_dump_order_type(d, id, false); if (err < 0) @@ -752,41 +757,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) } } -static int btf_align_of(const struct btf *btf, __u32 id) -{ - const struct btf_type *t = btf__type_by_id(btf, id); - __u16 kind = btf_kind(t); - - switch (kind) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - return min(sizeof(void *), t->size); - case BTF_KIND_PTR: - return sizeof(void *); - case BTF_KIND_TYPEDEF: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - return btf_align_of(btf, t->type); - case BTF_KIND_ARRAY: - return btf_align_of(btf, btf_array(t)->type); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - int i, align = 1; - - for (i = 0; i < vlen; i++, m++) - align = max(align, btf_align_of(btf, m->type)); - - return align; - } - default: - pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); - return 1; - } -} - static bool btf_is_struct_packed(const struct btf *btf, __u32 id, const struct btf_type *t) { @@ -794,18 +764,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, int align, i, bit_sz; __u16 vlen; - align = btf_align_of(btf, id); + align = btf__align_of(btf, id); /* size of a non-packed struct has to be a multiple of its alignment*/ - if (t->size % align) + if (align && t->size % align) return true; m = btf_members(t); vlen = btf_vlen(t); /* all non-bitfield fields have to be naturally aligned */ for (i = 0; i < vlen; i++, m++) { - align = btf_align_of(btf, m->type); + align = btf__align_of(btf, m->type); bit_sz = btf_member_bitfield_size(t, i); - if (bit_sz == 0 && m->offset % (8 * align) != 0) + if (align && bit_sz == 0 && m->offset % (8 * align) != 0) return true; } @@ -889,7 +859,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, fname = btf_name_of(d, m->name_off); m_sz = btf_member_bitfield_size(t, i); m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf_align_of(d->btf, m->type); + align = packed ? 1 : btf__align_of(d->btf, m->type); btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); btf_dump_printf(d, "\n%s", pfx(lvl + 1)); @@ -907,7 +877,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, /* pad at the end, if necessary */ if (is_struct) { - align = packed ? 1 : btf_align_of(d->btf, id); + align = packed ? 1 : btf__align_of(d->btf, id); btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, lvl + 1); } @@ -1051,6 +1021,21 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) * of a stack frame. Some care is required to "pop" stack frames after * processing type declaration chain. */ +int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts) +{ + const char *fname; + int lvl; + + if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) + return -EINVAL; + + fname = OPTS_GET(opts, field_name, NULL); + lvl = OPTS_GET(opts, indent_level, 0); + btf_dump_emit_type_decl(d, id, fname, lvl); + return 0; +} + static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, const char *fname, int lvl) { diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 6122272943e6..54c30c802070 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -12,6 +12,9 @@ #include <linux/err.h> #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f09772192f1..7469c7dcc15e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,11 +18,13 @@ #include <stdarg.h> #include <libgen.h> #include <inttypes.h> +#include <limits.h> #include <string.h> #include <unistd.h> #include <endian.h> #include <fcntl.h> #include <errno.h> +#include <ctype.h> #include <asm/unistd.h> #include <linux/err.h> #include <linux/kernel.h> @@ -41,9 +43,11 @@ #include <sys/types.h> #include <sys/vfs.h> #include <sys/utsname.h> +#include <sys/resource.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> +#include <zlib.h> #include "libbpf.h" #include "bpf.h" @@ -52,6 +56,9 @@ #include "libbpf_internal.h" #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef EM_BPF #define EM_BPF 247 #endif @@ -67,6 +74,12 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); +static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, + int idx); +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -99,14 +112,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) va_end(args); } -#define STRERR_BUFSIZE 128 +static void pr_perm_msg(int err) +{ + struct rlimit limit; + char buf[100]; + + if (err != -EPERM || geteuid() != 0) + return; + + err = getrlimit(RLIMIT_MEMLOCK, &limit); + if (err) + return; + + if (limit.rlim_cur == RLIM_INFINITY) + return; -#define CHECK_ERR(action, err, out) do { \ - err = action; \ - if (err) \ - goto out; \ -} while (0) + if (limit.rlim_cur < 1024) + snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); + else if (limit.rlim_cur < 1024*1024) + snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); + else + snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); + + pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", + buf); +} +#define STRERR_BUFSIZE 128 /* Copied from tools/perf/util/util.h */ #ifndef zfree @@ -144,6 +176,22 @@ struct bpf_capabilities { __u32 btf_datasec:1; /* BPF_F_MMAPABLE is supported for arrays */ __u32 array_mmap:1; + /* BTF_FUNC_GLOBAL is supported */ + __u32 btf_func_global:1; +}; + +enum reloc_type { + RELO_LD64, + RELO_CALL, + RELO_DATA, + RELO_EXTERN, +}; + +struct reloc_desc { + enum reloc_type type; + int insn_idx; + int map_idx; + int sym_off; }; /* @@ -164,16 +212,7 @@ struct bpf_program { size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct reloc_desc { - enum { - RELO_LD64, - RELO_CALL, - RELO_DATA, - } type; - int insn_idx; - int map_idx; - int sym_off; - } *reloc_desc; + struct reloc_desc *reloc_desc; int nr_reloc; int log_level; @@ -202,22 +241,51 @@ struct bpf_program { __u32 prog_flags; }; +struct bpf_struct_ops { + const char *tname; + const struct btf_type *type; + struct bpf_program **progs; + __u32 *kern_func_off; + /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ + void *data; + /* e.g. struct bpf_struct_ops_tcp_congestion_ops in + * btf_vmlinux's format. + * struct bpf_struct_ops_tcp_congestion_ops { + * [... some other kernel fields ...] + * struct tcp_congestion_ops data; + * } + * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) + * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" + * from "data". + */ + void *kern_vdata; + __u32 type_id; +}; + +#define DATA_SEC ".data" +#define BSS_SEC ".bss" +#define RODATA_SEC ".rodata" +#define KCONFIG_SEC ".kconfig" +#define STRUCT_OPS_SEC ".struct_ops" + enum libbpf_map_type { LIBBPF_MAP_UNSPEC, LIBBPF_MAP_DATA, LIBBPF_MAP_BSS, LIBBPF_MAP_RODATA, + LIBBPF_MAP_KCONFIG, }; static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = ".data", - [LIBBPF_MAP_BSS] = ".bss", - [LIBBPF_MAP_RODATA] = ".rodata", + [LIBBPF_MAP_DATA] = DATA_SEC, + [LIBBPF_MAP_BSS] = BSS_SEC, + [LIBBPF_MAP_RODATA] = RODATA_SEC, + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, }; struct bpf_map { - int fd; char *name; + int fd; int sec_idx; size_t sec_offset; int map_ifindex; @@ -225,17 +293,37 @@ struct bpf_map { struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + void *mmaped; + struct bpf_struct_ops *st_ops; char *pin_path; bool pinned; bool reused; }; -struct bpf_secdata { - void *rodata; - void *data; +enum extern_type { + EXT_UNKNOWN, + EXT_CHAR, + EXT_BOOL, + EXT_INT, + EXT_TRISTATE, + EXT_CHAR_ARR, +}; + +struct extern_desc { + const char *name; + int sym_idx; + int btf_id; + enum extern_type type; + int sz; + int align; + int data_off; + bool is_signed; + bool is_weak; + bool is_set; }; static LIST_HEAD(bpf_objects_list); @@ -250,11 +338,14 @@ struct bpf_object { struct bpf_map *maps; size_t nr_maps; size_t maps_cap; - struct bpf_secdata sections; + + char *kconfig; + struct extern_desc *externs; + int nr_extern; + int kconfig_map_idx; bool loaded; bool has_pseudo_calls; - bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -270,6 +361,7 @@ struct bpf_object { Elf_Data *data; Elf_Data *rodata; Elf_Data *bss; + Elf_Data *st_ops_data; size_t strtabidx; struct { GElf_Shdr shdr; @@ -279,9 +371,11 @@ struct bpf_object { int maps_shndx; int btf_maps_shndx; int text_shndx; + int symbols_shndx; int data_shndx; int rodata_shndx; int bss_shndx; + int st_ops_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -291,6 +385,10 @@ struct bpf_object { struct list_head list; struct btf *btf; + /* Parse and load BTF vmlinux if any of the programs in the object need + * it at load time. + */ + struct btf *btf_vmlinux; struct btf_ext *btf_ext; void *priv; @@ -509,6 +607,348 @@ static __u32 get_kernel_version(void) return KERNEL_VERSION(major, minor, patch); } +static const struct btf_member * +find_member_by_offset(const struct btf_type *t, __u32 bit_offset) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (btf_member_bit_offset(t, i) == bit_offset) + return m; + } + + return NULL; +} + +static const struct btf_member * +find_member_by_name(const struct btf *btf, const struct btf_type *t, + const char *name) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) + return m; + } + + return NULL; +} + +#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind); + +static int +find_struct_ops_kern_types(const struct btf *btf, const char *tname, + const struct btf_type **type, __u32 *type_id, + const struct btf_type **vtype, __u32 *vtype_id, + const struct btf_member **data_member) +{ + const struct btf_type *kern_type, *kern_vtype; + const struct btf_member *kern_data_member; + __s32 kern_vtype_id, kern_type_id; + __u32 i; + + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", + tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + + /* Find the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, + * find "struct bpf_struct_ops_tcp_congestion_ops" from the + * btf_vmlinux. + */ + kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, + tname, BTF_KIND_STRUCT); + if (kern_vtype_id < 0) { + pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", + STRUCT_OPS_VALUE_PREFIX, tname); + return kern_vtype_id; + } + kern_vtype = btf__type_by_id(btf, kern_vtype_id); + + /* Find "struct tcp_congestion_ops" from + * struct bpf_struct_ops_tcp_congestion_ops { + * [ ... ] + * struct tcp_congestion_ops data; + * } + */ + kern_data_member = btf_members(kern_vtype); + for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { + if (kern_data_member->type == kern_type_id) + break; + } + if (i == btf_vlen(kern_vtype)) { + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", + tname, STRUCT_OPS_VALUE_PREFIX, tname); + return -EINVAL; + } + + *type = kern_type; + *type_id = kern_type_id; + *vtype = kern_vtype; + *vtype_id = kern_vtype_id; + *data_member = kern_data_member; + + return 0; +} + +static bool bpf_map__is_struct_ops(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; +} + +/* Init the map's fields that depend on kern_btf */ +static int bpf_map__init_kern_struct_ops(struct bpf_map *map, + const struct btf *btf, + const struct btf *kern_btf) +{ + const struct btf_member *member, *kern_member, *kern_data_member; + const struct btf_type *type, *kern_type, *kern_vtype; + __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_struct_ops *st_ops; + void *data, *kern_data; + const char *tname; + int err; + + st_ops = map->st_ops; + type = st_ops->type; + tname = st_ops->tname; + err = find_struct_ops_kern_types(kern_btf, tname, + &kern_type, &kern_type_id, + &kern_vtype, &kern_vtype_id, + &kern_data_member); + if (err) + return err; + + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", + map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + + map->def.value_size = kern_vtype->size; + map->btf_vmlinux_value_type_id = kern_vtype_id; + + st_ops->kern_vdata = calloc(1, kern_vtype->size); + if (!st_ops->kern_vdata) + return -ENOMEM; + + data = st_ops->data; + kern_data_off = kern_data_member->offset / 8; + kern_data = st_ops->kern_vdata + kern_data_off; + + member = btf_members(type); + for (i = 0; i < btf_vlen(type); i++, member++) { + const struct btf_type *mtype, *kern_mtype; + __u32 mtype_id, kern_mtype_id; + void *mdata, *kern_mdata; + __s64 msize, kern_msize; + __u32 moff, kern_moff; + __u32 kern_member_idx; + const char *mname; + + mname = btf__name_by_offset(btf, member->name_off); + kern_member = find_member_by_name(kern_btf, kern_type, mname); + if (!kern_member) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + kern_member_idx = kern_member - btf_members(kern_type); + if (btf_member_bitfield_size(type, i) || + btf_member_bitfield_size(kern_type, kern_member_idx)) { + pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + moff = member->offset / 8; + kern_moff = kern_member->offset / 8; + + mdata = data + moff; + kern_mdata = kern_data + kern_moff; + + mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, + &kern_mtype_id); + if (BTF_INFO_KIND(mtype->info) != + BTF_INFO_KIND(kern_mtype->info)) { + pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", + map->name, mname, BTF_INFO_KIND(mtype->info), + BTF_INFO_KIND(kern_mtype->info)); + return -ENOTSUP; + } + + if (btf_is_ptr(mtype)) { + struct bpf_program *prog; + + mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, + kern_mtype->type, + &kern_mtype_id); + if (!btf_is_func_proto(mtype) || + !btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + prog = st_ops->progs[i]; + if (!prog) { + pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", + map->name, mname); + continue; + } + + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + + st_ops->kern_func_off[i] = kern_data_off + kern_moff; + + pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", + map->name, mname, prog->name, moff, + kern_moff); + + continue; + } + + msize = btf__resolve_size(btf, mtype_id); + kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); + if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", + map->name, mname, (ssize_t)msize, + (ssize_t)kern_msize); + return -ENOTSUP; + } + + pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", + map->name, mname, (unsigned int)msize, + moff, kern_moff); + memcpy(kern_mdata, mdata, msize); + } + + return 0; +} + +static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) +{ + struct bpf_map *map; + size_t i; + int err; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!bpf_map__is_struct_ops(map)) + continue; + + err = bpf_map__init_kern_struct_ops(map, obj->btf, + obj->btf_vmlinux); + if (err) + return err; + } + + return 0; +} + +static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +{ + const struct btf_type *type, *datasec; + const struct btf_var_secinfo *vsi; + struct bpf_struct_ops *st_ops; + const char *tname, *var_name; + __s32 type_id, datasec_id; + const struct btf *btf; + struct bpf_map *map; + __u32 i; + + if (obj->efile.st_ops_shndx == -1) + return 0; + + btf = obj->btf; + datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + BTF_KIND_DATASEC); + if (datasec_id < 0) { + pr_warn("struct_ops init: DATASEC %s not found\n", + STRUCT_OPS_SEC); + return -EINVAL; + } + + datasec = btf__type_by_id(btf, datasec_id); + vsi = btf_var_secinfos(datasec); + for (i = 0; i < btf_vlen(datasec); i++, vsi++) { + type = btf__type_by_id(obj->btf, vsi->type); + var_name = btf__name_by_offset(obj->btf, type->name_off); + + type_id = btf__resolve_type(obj->btf, vsi->type); + if (type_id < 0) { + pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", + vsi->type, STRUCT_OPS_SEC); + return -EINVAL; + } + + type = btf__type_by_id(obj->btf, type_id); + tname = btf__name_by_offset(obj->btf, type->name_off); + if (!tname[0]) { + pr_warn("struct_ops init: anonymous type is not supported\n"); + return -ENOTSUP; + } + if (!btf_is_struct(type)) { + pr_warn("struct_ops init: %s is not a struct\n", tname); + return -EINVAL; + } + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + + map->sec_idx = obj->efile.st_ops_shndx; + map->sec_offset = vsi->offset; + map->name = strdup(var_name); + if (!map->name) + return -ENOMEM; + + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; + map->def.key_size = sizeof(int); + map->def.value_size = type->size; + map->def.max_entries = 1; + + map->st_ops = calloc(1, sizeof(*map->st_ops)); + if (!map->st_ops) + return -ENOMEM; + st_ops = map->st_ops; + st_ops->data = malloc(type->size); + st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); + st_ops->kern_func_off = malloc(btf_vlen(type) * + sizeof(*st_ops->kern_func_off)); + if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) + return -ENOMEM; + + if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", + var_name, STRUCT_OPS_SEC); + return -EINVAL; + } + + memcpy(st_ops->data, + obj->efile.st_ops_data->d_buf + vsi->offset, + type->size); + st_ops->tname = tname; + st_ops->type = type; + st_ops->type_id = type_id; + + pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", + tname, type_id, var_name, vsi->offset); + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, @@ -550,6 +990,8 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.data_shndx = -1; obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->efile.st_ops_shndx = -1; + obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -572,6 +1014,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.data = NULL; obj->efile.rodata = NULL; obj->efile.bss = NULL; + obj->efile.st_ops_data = NULL; zfree(&obj->efile.reloc_sects); obj->efile.nr_reloc_sects = 0; @@ -677,16 +1120,6 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) return 0; } -static int compare_bpf_map(const void *_a, const void *_b) -{ - const struct bpf_map *a = _a; - const struct bpf_map *b = _b; - - if (a->sec_idx != b->sec_idx) - return a->sec_idx - b->sec_idx; - return a->sec_offset - b->sec_offset; -} - static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) { if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -748,15 +1181,18 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, *size = 0; if (!name) { return -EINVAL; - } else if (!strcmp(name, ".data")) { + } else if (!strcmp(name, DATA_SEC)) { if (obj->efile.data) *size = obj->efile.data->d_size; - } else if (!strcmp(name, ".bss")) { + } else if (!strcmp(name, BSS_SEC)) { if (obj->efile.bss) *size = obj->efile.bss->d_size; - } else if (!strcmp(name, ".rodata")) { + } else if (!strcmp(name, RODATA_SEC)) { if (obj->efile.rodata) *size = obj->efile.rodata->d_size; + } else if (!strcmp(name, STRUCT_OPS_SEC)) { + if (obj->efile.st_ops_data) + *size = obj->efile.st_ops_data->d_size; } else { ret = bpf_object_search_section_size(obj, name, &d_size); if (!ret) @@ -835,13 +1271,43 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return &obj->maps[obj->nr_maps++]; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + long page_sz = sysconf(_SC_PAGE_SIZE); + size_t map_sz; + + map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = roundup(map_sz, page_sz); + return map_sz; +} + +static char *internal_map_name(struct bpf_object *obj, + enum libbpf_map_type type) +{ + char map_name[BPF_OBJ_NAME_LEN], *p; + const char *sfx = libbpf_type_to_btf_name[type]; + int sfx_len = max((size_t)7, strlen(sfx)); + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, + strlen(obj->name)); + + snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, + sfx_len, libbpf_type_to_btf_name[type]); + + /* sanitise map name to characters allowed by kernel */ + for (p = map_name; *p && p < map_name + sizeof(map_name); p++) + if (!isalnum(*p) && *p != '_' && *p != '.') + *p = '_'; + + return strdup(map_name); +} + static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, Elf_Data *data, void **data_buff) + int sec_idx, void *data, size_t data_sz) { - char map_name[BPF_OBJ_NAME_LEN]; struct bpf_map_def *def; struct bpf_map *map; + int err; map = bpf_object__add_map(obj); if (IS_ERR(map)) @@ -850,9 +1316,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, - libbpf_type_to_btf_name[type]); - map->name = strdup(map_name); + map->name = internal_map_name(obj, type); if (!map->name) { pr_warn("failed to alloc map name\n"); return -ENOMEM; @@ -861,25 +1325,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def = &map->def; def->type = BPF_MAP_TYPE_ARRAY; def->key_size = sizeof(int); - def->value_size = data->d_size; + def->value_size = data_sz; def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; - if (obj->caps.array_mmap) - def->map_flags |= BPF_F_MMAPABLE; + def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG + ? BPF_F_RDONLY_PROG : 0; + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map_name, map->sec_idx, map->sec_offset, def->map_flags); + map->name, map->sec_idx, map->sec_offset, def->map_flags); - if (data_buff) { - *data_buff = malloc(data->d_size); - if (!*data_buff) { - zfree(&map->name); - pr_warn("failed to alloc map content buffer\n"); - return -ENOMEM; - } - memcpy(*data_buff, data->d_buf, data->d_size); + map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("failed to alloc map '%s' content buffer: %d\n", + map->name, err); + zfree(&map->name); + return err; } + if (data) + memcpy(map->mmaped, data, data_sz); + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); return 0; } @@ -888,37 +1356,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) { int err; - if (!obj->caps.global_data) - return 0; /* * Populate obj->maps with libbpf internal maps. */ if (obj->efile.data_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, obj->efile.data_shndx, - obj->efile.data, - &obj->sections.data); + obj->efile.data->d_buf, + obj->efile.data->d_size); if (err) return err; } if (obj->efile.rodata_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, obj->efile.rodata_shndx, - obj->efile.rodata, - &obj->sections.rodata); + obj->efile.rodata->d_buf, + obj->efile.rodata->d_size); if (err) return err; } if (obj->efile.bss_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, obj->efile.bss_shndx, - obj->efile.bss, NULL); + NULL, + obj->efile.bss->d_size); if (err) return err; } return 0; } + +static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, + const void *name) +{ + int i; + + for (i = 0; i < obj->nr_extern; i++) { + if (strcmp(obj->externs[i].name, name) == 0) + return &obj->externs[i]; + } + return NULL; +} + +static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, + char value) +{ + switch (ext->type) { + case EXT_BOOL: + if (value == 'm') { + pr_warn("extern %s=%c should be tristate or char\n", + ext->name, value); + return -EINVAL; + } + *(bool *)ext_val = value == 'y' ? true : false; + break; + case EXT_TRISTATE: + if (value == 'y') + *(enum libbpf_tristate *)ext_val = TRI_YES; + else if (value == 'm') + *(enum libbpf_tristate *)ext_val = TRI_MODULE; + else /* value == 'n' */ + *(enum libbpf_tristate *)ext_val = TRI_NO; + break; + case EXT_CHAR: + *(char *)ext_val = value; + break; + case EXT_UNKNOWN: + case EXT_INT: + case EXT_CHAR_ARR: + default: + pr_warn("extern %s=%c should be bool, tristate, or char\n", + ext->name, value); + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int set_ext_value_str(struct extern_desc *ext, char *ext_val, + const char *value) +{ + size_t len; + + if (ext->type != EXT_CHAR_ARR) { + pr_warn("extern %s=%s should char array\n", ext->name, value); + return -EINVAL; + } + + len = strlen(value); + if (value[len - 1] != '"') { + pr_warn("extern '%s': invalid string config '%s'\n", + ext->name, value); + return -EINVAL; + } + + /* strip quotes */ + len -= 2; + if (len >= ext->sz) { + pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->sz - 1); + len = ext->sz - 1; + } + memcpy(ext_val, value + 1, len); + ext_val[len] = '\0'; + ext->is_set = true; + return 0; +} + +static int parse_u64(const char *value, __u64 *res) +{ + char *value_end; + int err; + + errno = 0; + *res = strtoull(value, &value_end, 0); + if (errno) { + err = -errno; + pr_warn("failed to parse '%s' as integer: %d\n", value, err); + return err; + } + if (*value_end) { + pr_warn("failed to parse '%s' as integer completely\n", value); + return -EINVAL; + } + return 0; +} + +static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +{ + int bit_sz = ext->sz * 8; + + if (ext->sz == 8) + return true; + + /* Validate that value stored in u64 fits in integer of `ext->sz` + * bytes size without any loss of information. If the target integer + * is signed, we rely on the following limits of integer type of + * Y bits and subsequent transformation: + * + * -2^(Y-1) <= X <= 2^(Y-1) - 1 + * 0 <= X + 2^(Y-1) <= 2^Y - 1 + * 0 <= X + 2^(Y-1) < 2^Y + * + * For unsigned target integer, check that all the (64 - Y) bits are + * zero. + */ + if (ext->is_signed) + return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); + else + return (v >> bit_sz) == 0; +} + +static int set_ext_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) +{ + if (ext->type != EXT_INT && ext->type != EXT_CHAR) { + pr_warn("extern %s=%llu should be integer\n", + ext->name, (unsigned long long)value); + return -EINVAL; + } + if (!is_ext_value_in_range(ext, value)) { + pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->sz); + return -ERANGE; + } + switch (ext->sz) { + case 1: *(__u8 *)ext_val = value; break; + case 2: *(__u16 *)ext_val = value; break; + case 4: *(__u32 *)ext_val = value; break; + case 8: *(__u64 *)ext_val = value; break; + default: + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int bpf_object__process_kconfig_line(struct bpf_object *obj, + char *buf, void *data) +{ + struct extern_desc *ext; + char *sep, *value; + int len, err = 0; + void *ext_val; + __u64 num; + + if (strncmp(buf, "CONFIG_", 7)) + return 0; + + sep = strchr(buf, '='); + if (!sep) { + pr_warn("failed to parse '%s': no separator\n", buf); + return -EINVAL; + } + + /* Trim ending '\n' */ + len = strlen(buf); + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) { + *sep = '='; + pr_warn("failed to parse '%s': no value\n", buf); + return -EINVAL; + } + + ext = find_extern_by_name(obj, buf); + if (!ext || ext->is_set) + return 0; + + ext_val = data + ext->data_off; + value = sep + 1; + + switch (*value) { + case 'y': case 'n': case 'm': + err = set_ext_value_tri(ext, ext_val, *value); + break; + case '"': + err = set_ext_value_str(ext, ext_val, value); + break; + default: + /* assume integer */ + err = parse_u64(value, &num); + if (err) { + pr_warn("extern %s=%s should be integer\n", + ext->name, value); + return err; + } + err = set_ext_value_num(ext, ext_val, num); + break; + } + if (err) + return err; + pr_debug("extern %s=%s\n", ext->name, value); + return 0; +} + +static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) +{ + char buf[PATH_MAX]; + struct utsname uts; + int len, err = 0; + gzFile file; + + uname(&uts); + len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + /* gzopen also accepts uncompressed files. */ + file = gzopen(buf, "r"); + if (!file) + file = gzopen("/proc/config.gz", "r"); + + if (!file) { + pr_warn("failed to open system Kconfig\n"); + return -ENOENT; + } + + while (gzgets(file, buf, sizeof(buf))) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing system Kconfig line '%s': %d\n", + buf, err); + goto out; + } + } + +out: + gzclose(file); + return err; +} + +static int bpf_object__read_kconfig_mem(struct bpf_object *obj, + const char *config, void *data) +{ + char buf[PATH_MAX]; + int err = 0; + FILE *file; + + file = fmemopen((void *)config, strlen(config), "r"); + if (!file) { + err = -errno; + pr_warn("failed to open in-memory Kconfig: %d\n", err); + return err; + } + + while (fgets(buf, sizeof(buf), file)) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing in-memory Kconfig line '%s': %d\n", + buf, err); + break; + } + } + + fclose(file); + return err; +} + +static int bpf_object__init_kconfig_map(struct bpf_object *obj) +{ + struct extern_desc *last_ext; + size_t map_sz; + int err; + + if (obj->nr_extern == 0) + return 0; + + last_ext = &obj->externs[obj->nr_extern - 1]; + map_sz = last_ext->data_off + last_ext->sz; + + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, + obj->efile.symbols_shndx, + NULL, map_sz); + if (err) + return err; + + obj->kconfig_map_idx = obj->nr_maps - 1; + + return 0; +} + static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) { Elf_Data *symbols = obj->efile.symbols; @@ -1060,6 +1823,20 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) return t; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + /* * Fetch integer attribute of BTF map definition. Such attributes are * represented using a pointer to an array, in which dimensionality of array @@ -1242,15 +2019,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found key [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %zd.\n", + map_name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -1285,15 +2062,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found value [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %zd.\n", + map_name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -1393,28 +2170,24 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__init_maps(struct bpf_object *obj, + const struct bpf_object_open_opts *opts) { - bool strict = !relaxed_maps; + const char *pin_root_path; + bool strict; int err; - err = bpf_object__init_user_maps(obj, strict); - if (err) - return err; - - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); - if (err) - return err; + strict = !OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_global_data_maps(obj); + err = bpf_object__init_user_maps(obj, strict); + err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = err ?: bpf_object__init_global_data_maps(obj); + err = err ?: bpf_object__init_kconfig_map(obj); + err = err ?: bpf_object__init_struct_ops_maps(obj); if (err) return err; - if (obj->nr_maps) { - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), - compare_bpf_map); - } return 0; } @@ -1438,13 +2211,14 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static void bpf_object__sanitize_btf(struct bpf_object *obj) { + bool has_func_global = obj->caps.btf_func_global; bool has_datasec = obj->caps.btf_datasec; bool has_func = obj->caps.btf_func; struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - if (!obj->btf || (has_func && has_datasec)) + if (!obj->btf || (has_func && has_datasec && has_func_global)) return; for (i = 1; i <= btf__get_nr_types(btf); i++) { @@ -1492,6 +2266,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj) } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); + } else if (!has_func_global && btf_is_func(t)) { + /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ + t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); } } } @@ -1509,28 +2286,27 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { - return obj->efile.btf_maps_shndx >= 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, Elf_Data *btf_data, Elf_Data *btf_ext_data) { - bool btf_required = bpf_object__is_btf_mandatory(obj); - int err = 0; + int err = -ENOENT; if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); if (IS_ERR(obj->btf)) { + err = PTR_ERR(obj->btf); + obj->btf = NULL; pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); goto out; } - err = btf__finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - goto out; - } + err = 0; } if (btf_ext_data) { if (!obj->btf) { @@ -1548,22 +2324,72 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err || IS_ERR(obj->btf)) { - if (btf_required) - err = err ? : PTR_ERR(obj->btf); - else - err = 0; - if (!IS_ERR_OR_NULL(obj->btf)) - btf__free(obj->btf); - obj->btf = NULL; + if (err && bpf_object__is_btf_mandatory(obj)) { + pr_warn("BTF is required, but is missing or corrupted.\n"); + return err; } - if (btf_required && !obj->btf) { + return 0; +} + +static int bpf_object__finalize_btf(struct bpf_object *obj) +{ + int err; + + if (!obj->btf) + return 0; + + err = btf__finalize_data(obj, obj->btf); + if (!err) + return 0; + + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + + if (bpf_object__is_btf_mandatory(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); - return err == 0 ? -ENOENT : err; + return -ENOENT; } return 0; } +static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) +{ + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + return true; + + /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs + * also need vmlinux BTF + */ + if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) + return true; + + return false; +} + +static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) +{ + struct bpf_program *prog; + int err; + + bpf_object__for_each_program(prog, obj) { + if (libbpf_prog_needs_vmlinux_btf(prog)) { + obj->btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(obj->btf_vmlinux)) { + err = PTR_ERR(obj->btf_vmlinux); + pr_warn("Error loading vmlinux BTF: %d\n", err); + obj->btf_vmlinux = NULL; + return err; + } + return 0; + } + } + + return 0; +} + static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { int err = 0; @@ -1592,8 +2418,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1665,6 +2490,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; + obj->efile.symbols_shndx = idx; obj->efile.strtabidx = sh.sh_link; } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { if (sh.sh_flags & SHF_EXECINSTR) { @@ -1683,12 +2509,15 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, name, obj->path, cp); return err; } - } else if (strcmp(name, ".data") == 0) { + } else if (strcmp(name, DATA_SEC) == 0) { obj->efile.data = data; obj->efile.data_shndx = idx; - } else if (strcmp(name, ".rodata") == 0) { + } else if (strcmp(name, RODATA_SEC) == 0) { obj->efile.rodata = data; obj->efile.rodata_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { + obj->efile.st_ops_data = data; + obj->efile.st_ops_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -1698,7 +2527,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, int sec = sh.sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec)) { + if (!section_have_execinstr(obj, sec) && + strcmp(name, ".rel" STRUCT_OPS_SEC)) { pr_debug("skip relo %s(%d) for section(%d)\n", name, idx, sec); continue; @@ -1716,7 +2546,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + } else if (sh.sh_type == SHT_NOBITS && + strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { @@ -1728,14 +2559,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); - if (!err) - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); - if (!err) - err = bpf_object__sanitize_and_load_btf(obj); - if (!err) - err = bpf_object__init_prog_names(obj); - return err; + return bpf_object__init_btf(obj, btf_data, btf_ext_data); +} + +static bool sym_is_extern(const GElf_Sym *sym) +{ + int bind = GELF_ST_BIND(sym->st_info); + /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ + return sym->st_shndx == SHN_UNDEF && + (bind == STB_GLOBAL || bind == STB_WEAK) && + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; +} + +static int find_extern_btf_id(const struct btf *btf, const char *ext_name) +{ + const struct btf_type *t; + const char *var_name; + int i, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_var(t)) + continue; + + var_name = btf__name_by_offset(btf, t->name_off); + if (strcmp(var_name, ext_name)) + continue; + + if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) + return -EINVAL; + + return i; + } + + return -ENOENT; +} + +static enum extern_type find_extern_type(const struct btf *btf, int id, + bool *is_signed) +{ + const struct btf_type *t; + const char *name; + + t = skip_mods_and_typedefs(btf, id, NULL); + name = btf__name_by_offset(btf, t->name_off); + + if (is_signed) + *is_signed = false; + switch (btf_kind(t)) { + case BTF_KIND_INT: { + int enc = btf_int_encoding(t); + + if (enc & BTF_INT_BOOL) + return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + if (is_signed) + *is_signed = enc & BTF_INT_SIGNED; + if (t->size == 1) + return EXT_CHAR; + if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) + return EXT_UNKNOWN; + return EXT_INT; + } + case BTF_KIND_ENUM: + if (t->size != 4) + return EXT_UNKNOWN; + if (strcmp(name, "libbpf_tristate")) + return EXT_UNKNOWN; + return EXT_TRISTATE; + case BTF_KIND_ARRAY: + if (btf_array(t)->nelems == 0) + return EXT_UNKNOWN; + if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) + return EXT_UNKNOWN; + return EXT_CHAR_ARR; + default: + return EXT_UNKNOWN; + } +} + +static int cmp_externs(const void *_a, const void *_b) +{ + const struct extern_desc *a = _a; + const struct extern_desc *b = _b; + + /* descending order by alignment requirements */ + if (a->align != b->align) + return a->align > b->align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->sz != b->sz) + return a->sz < b->sz ? -1 : 1; + /* resolve ties by name */ + return strcmp(a->name, b->name); +} + +static int bpf_object__collect_externs(struct bpf_object *obj) +{ + const struct btf_type *t; + struct extern_desc *ext; + int i, n, off, btf_id; + struct btf_type *sec; + const char *ext_name; + Elf_Scn *scn; + GElf_Shdr sh; + + if (!obj->efile.symbols) + return 0; + + scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); + if (!scn) + return -LIBBPF_ERRNO__FORMAT; + if (gelf_getshdr(scn, &sh) != &sh) + return -LIBBPF_ERRNO__FORMAT; + n = sh.sh_size / sh.sh_entsize; + + pr_debug("looking for externs among %d symbols...\n", n); + for (i = 0; i < n; i++) { + GElf_Sym sym; + + if (!gelf_getsym(obj->efile.symbols, i, &sym)) + return -LIBBPF_ERRNO__FORMAT; + if (!sym_is_extern(&sym)) + continue; + ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!ext_name || !ext_name[0]) + continue; + + ext = obj->externs; + ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); + if (!ext) + return -ENOMEM; + obj->externs = ext; + ext = &ext[obj->nr_extern]; + memset(ext, 0, sizeof(*ext)); + obj->nr_extern++; + + ext->btf_id = find_extern_btf_id(obj->btf, ext_name); + if (ext->btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s': %d\n", + ext_name, ext->btf_id); + return ext->btf_id; + } + t = btf__type_by_id(obj->btf, ext->btf_id); + ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->sym_idx = i; + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->sz = btf__resolve_size(obj->btf, t->type); + if (ext->sz <= 0) { + pr_warn("failed to resolve size of extern '%s': %d\n", + ext_name, ext->sz); + return ext->sz; + } + ext->align = btf__align_of(obj->btf, t->type); + if (ext->align <= 0) { + pr_warn("failed to determine alignment of extern '%s': %d\n", + ext_name, ext->align); + return -EINVAL; + } + ext->type = find_extern_type(obj->btf, t->type, + &ext->is_signed); + if (ext->type == EXT_UNKNOWN) { + pr_warn("extern '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } + pr_debug("collected %d externs total\n", obj->nr_extern); + + if (!obj->nr_extern) + return 0; + + /* sort externs by (alignment, size, name) and calculate their offsets + * within a map */ + qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + ext->data_off = roundup(off, ext->align); + off = ext->data_off + ext->sz; + pr_debug("extern #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->data_off, ext->name); + } + + btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); + if (btf_id <= 0) { + pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); + return -ESRCH; + } + + sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + vs->offset = ext->data_off; + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + } + + return 0; } static struct bpf_program * @@ -1765,6 +2799,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, return NULL; } +struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + if (!strcmp(prog->name, name)) + return prog; + } + return NULL; +} + static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { @@ -1789,6 +2836,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_BSS; else if (shndx == obj->efile.rodata_shndx) return LIBBPF_MAP_RODATA; + else if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; else return LIBBPF_MAP_UNSPEC; } @@ -1817,7 +2866,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + pr_warn("bad call relo offset: %zu\n", + (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -1832,6 +2882,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; } + + if (sym_is_extern(sym)) { + int sym_idx = GELF_R_SYM(rel->r_info); + int i, n = obj->nr_extern; + struct extern_desc *ext; + + for (i = 0; i < n; i++) { + ext = &obj->externs[i]; + if (ext->sym_idx == sym_idx) + break; + } + if (i >= n) { + pr_warn("extern relo failed to find extern for sym %d\n", + sym_idx); + return -LIBBPF_ERRNO__RELOC; + } + pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", + i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + reloc_desc->type = RELO_EXTERN; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = ext->data_off; + return 0; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", name, shdr_idx); @@ -1859,8 +2933,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); + pr_warn("map relo failed to find map for sec %u, off %zu\n", + shdr_idx, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -1875,11 +2949,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_warn("bad data relo against section %u\n", shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (!obj->caps.global_data) { - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; if (map->libbpf_type != type) @@ -1941,9 +3010,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : "<?>"; - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), GELF_ST_BIND(sym.st_info), sym.st_name, name, insn_idx); @@ -1961,8 +3030,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) __u32 key_type_id = 0, value_type_id = 0; int ret; - /* if it's BTF-defined map, we don't need to search for type IDs */ - if (map->sec_idx == obj->efile.btf_maps_shndx) + /* if it's BTF-defined map, we don't need to search for type IDs. + * For struct_ops map, it does not need btf_key_type_id and + * btf_value_type_id. + */ + if (map->sec_idx == obj->efile.btf_maps_shndx || + bpf_map__is_struct_ops(map)) return 0; if (!bpf_map__is_internal(map)) { @@ -2166,6 +3239,32 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) return 0; } +static int bpf_object__probe_btf_func_global(struct bpf_object *obj) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + int btf_fd; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (btf_fd >= 0) { + obj->caps.btf_func_global = 1; + close(btf_fd); + return 1; + } + + return 0; +} + static int bpf_object__probe_btf_datasec(struct bpf_object *obj) { static const char strs[] = "\0x\0.data"; @@ -2221,6 +3320,7 @@ bpf_object__probe_caps(struct bpf_object *obj) bpf_object__probe_name, bpf_object__probe_global_data, bpf_object__probe_btf_func, + bpf_object__probe_btf_func_global, bpf_object__probe_btf_datasec, bpf_object__probe_array_mmap, }; @@ -2298,29 +3398,35 @@ bpf_object__reuse_map(struct bpf_map *map) static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { + enum libbpf_map_type map_type = map->libbpf_type; char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; - __u8 *data; - /* Nothing to do here since kernel already zero-initializes .bss map. */ - if (map->libbpf_type == LIBBPF_MAP_BSS) + /* kernel already zero-initializes .bss map. */ + if (map_type == LIBBPF_MAP_BSS) return 0; - data = map->libbpf_type == LIBBPF_MAP_DATA ? - obj->sections.data : obj->sections.rodata; + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); + if (err) { + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error setting initial map(%s) contents: %s\n", + map->name, cp); + return err; + } - err = bpf_map_update_elem(map->fd, &zero, data, 0); - /* Freeze .rodata map as read-only from syscall side. */ - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + /* Freeze .rodata and .kconfig map as read-only from syscall side. */ + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { err = bpf_map_freeze(map->fd); if (err) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error freezing map(%s) as read-only: %s\n", map->name, cp); - err = 0; + return err; } } - return err; + return 0; } static int @@ -2381,6 +3487,9 @@ bpf_object__create_maps(struct bpf_object *obj) if (bpf_map_type__is_map_in_map(def->type) && map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; + if (bpf_map__is_struct_ops(map)) + create_attr.btf_vmlinux_value_type_id = + map->btf_vmlinux_value_type_id; if (obj->btf && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -2411,6 +3520,7 @@ err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to create map (name: '%s'): %s(%d)\n", map->name, cp, err); + pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2536,6 +3646,21 @@ static bool str_is_empty(const char *s) return !s || !s[0]; } +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + /* * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -2573,6 +3698,7 @@ static int bpf_core_spec_parse(const struct btf *btf, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; const struct btf_type *t; const char *name; __u32 id; @@ -2620,6 +3746,7 @@ static int bpf_core_spec_parse(const struct btf *btf, return -EINVAL; access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; if (btf_is_composite(t)) { const struct btf_member *m; @@ -2637,18 +3764,23 @@ static int bpf_core_spec_parse(const struct btf *btf, if (str_is_empty(name)) return -EINVAL; - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->spec[spec->len].name = name; + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; spec->len++; } id = m->type; } else if (btf_is_array(t)) { const struct btf_array *a = btf_array(t); + bool flex; t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t || access_idx >= a->nelems) + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) return -EINVAL; spec->spec[spec->len].type_id = id; @@ -2743,7 +3875,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; @@ -2953,12 +4087,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, */ if (i > 0) { const struct btf_array *a; + bool flex; if (!btf_is_array(targ_type)) return 0; a = btf_array(targ_type); - if (local_acc->idx >= a->nelems) + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) return 0; if (!skip_mods_and_typedefs(targ_btf, a->type, &targ_id)) @@ -3109,25 +4245,38 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, */ static int bpf_core_reloc_insn(struct bpf_program *prog, const struct bpf_field_reloc *relo, + int relo_idx, const struct bpf_core_spec *local_spec, const struct bpf_core_spec *targ_spec) { - bool failed = false, validate = true; __u32 orig_val, new_val; struct bpf_insn *insn; + bool validate = true; int insn_idx, err; __u8 class; if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; insn_idx = relo->insn_off / sizeof(struct bpf_insn); + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); if (relo->kind == BPF_FIELD_EXISTS) { orig_val = 1; /* can't generate EXISTS relo w/o local field */ new_val = targ_spec ? 1 : 0; } else if (!targ_spec) { - failed = true; - new_val = (__u32)-1; + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ + return 0; } else { err = bpf_core_calc_field_relo(prog, relo, local_spec, &orig_val, &validate); @@ -3139,26 +4288,47 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return err; } - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - - if (class == BPF_ALU || class == BPF_ALU64) { + switch (class) { + case BPF_ALU: + case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->imm, orig_val, new_val); + if (validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->imm, orig_val, new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); - } else { - pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, new_val); + return -ERANGE; + } + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); + break; + default: + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), relo_idx, insn_idx, insn->code, insn->src_reg, insn->dst_reg, insn->off, insn->imm); return -EINVAL; @@ -3167,92 +4337,6 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return 0; } -static struct btf *btf_load_raw(const char *path) -{ - struct btf *btf; - size_t read_cnt; - struct stat st; - void *data; - FILE *f; - - if (stat(path, &st)) - return ERR_PTR(-errno); - - data = malloc(st.st_size); - if (!data) - return ERR_PTR(-ENOMEM); - - f = fopen(path, "rb"); - if (!f) { - btf = ERR_PTR(-errno); - goto cleanup; - } - - read_cnt = fread(data, 1, st.st_size, f); - fclose(f); - if (read_cnt < st.st_size) { - btf = ERR_PTR(-EBADF); - goto cleanup; - } - - btf = btf__new(data, read_cnt); - -cleanup: - free(data); - return btf; -} - -/* - * Probe few well-known locations for vmlinux kernel image and try to load BTF - * data out of it to use for target BTF. - */ -static struct btf *bpf_core_find_kernel_btf(void) -{ - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, - }; - char path[PATH_MAX + 1]; - struct utsname buf; - struct btf *btf; - int i; - - uname(&buf); - - for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); - - if (access(path, R_OK)) - continue; - - if (locations[i].raw_btf) - btf = btf_load_raw(path); - else - btf = btf__parse_elf(path, NULL); - - pr_debug("loading kernel BTF '%s': %ld\n", - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); - if (IS_ERR(btf)) - continue; - - return btf; - } - - pr_warn("failed to find valid kernel BTF\n"); - return ERR_PTR(-ESRCH); -} - /* Output spec definition in the format: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b @@ -3443,24 +4527,33 @@ static int bpf_core_reloc_field(struct bpf_program *prog, } /* - * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is - * requested, it's expected that we might not find any candidates. - * In this case, if field wasn't found in any candidate, the list of - * candidates shouldn't change at all, we'll just handle relocating - * appropriately, depending on relo's kind. + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. */ if (j > 0) cand_ids->len = j; - if (j == 0 && !prog->obj->relaxed_core_relocs && - relo->kind != BPF_FIELD_EXISTS) { - pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); - return -ESRCH; - } + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_reloc_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) + pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, &local_spec, + err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, j ? &targ_spec : NULL); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", @@ -3487,7 +4580,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else - targ_btf = bpf_core_find_kernel_btf(); + targ_btf = libbpf_find_kernel_btf(); if (IS_ERR(targ_btf)) { pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); @@ -3559,16 +4652,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (relo->type != RELO_CALL) - return -LIBBPF_ERRNO__RELOC; - - if (prog->idx == obj->efile.text_shndx) { - pr_warn("relo in .text insn %d into off %d (insn #%d)\n", - relo->insn_idx, relo->sym_off, relo->sym_off / 8); - return -LIBBPF_ERRNO__RELOC; - } - - if (prog->main_prog_cnt == 0) { + if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { pr_warn("no .text section found yet relo into text exist\n"); @@ -3598,6 +4682,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, text->insns_cnt, text->section_name, prog->section_name); } + insn = &prog->insns[relo->insn_idx]; insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; return 0; @@ -3623,27 +4708,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { + pr_warn("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } - if (relo->type != RELO_DATA) { - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - } else { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[1].imm = insn[0].imm + relo->sym_off; - } + switch (relo->type) { + case RELO_LD64: + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + insn[0].imm = obj->maps[relo->map_idx].fd; + break; + case RELO_DATA: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[1].imm = insn[0].imm + relo->sym_off; insn[0].imm = obj->maps[relo->map_idx].fd; - } else if (relo->type == RELO_CALL) { + break; + case RELO_EXTERN: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = relo->sym_off; + break; + case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); if (err) return err; + break; + default: + pr_warn("relo #%d: bad relo type %d\n", i, relo->type); + return -EINVAL; } } @@ -3667,8 +4762,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } + /* ensure .text is relocated first, as it's going to be copied as-is + * later for sub-program calls + */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx != obj->efile.text_shndx) + continue; + + err = bpf_program__relocate(prog, obj); + if (err) { + pr_warn("failed to relocate '%s'\n", prog->section_name); + return err; + } + break; + } + /* now relocate everything but .text, which by now is relocated + * properly, so we can copy raw sub-program instructions as is safely + */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; + if (prog->idx == obj->efile.text_shndx) + continue; err = bpf_program__relocate(prog, obj); if (err) { @@ -3679,6 +4794,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return 0; } +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data); + static int bpf_object__collect_reloc(struct bpf_object *obj) { int i, err; @@ -3699,6 +4818,15 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__INTERNAL; } + if (idx == obj->efile.st_ops_shndx) { + err = bpf_object__collect_struct_ops_map_reloc(obj, + shdr, + data); + if (err) + return err; + continue; + } + prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { pr_warn("relocation failed: no section(%d)\n", idx); @@ -3733,7 +4861,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_TRACING) { + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + load_attr.attach_btf_id = prog->attach_btf_id; + } else if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_btf_id = prog->attach_btf_id; } else { @@ -3778,6 +4909,7 @@ retry_load: ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); + pr_perm_msg(ret); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -3807,11 +4939,19 @@ out: return ret; } -int -bpf_program__load(struct bpf_program *prog, - char *license, __u32 kern_version) +static int libbpf_find_attach_btf_id(struct bpf_program *prog); + +int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { - int err = 0, fd, i; + int err = 0, fd, i, btf_id; + + if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { + btf_id = libbpf_find_attach_btf_id(prog); + if (btf_id <= 0) + return btf_id; + prog->attach_btf_id = btf_id; + } if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { @@ -3835,7 +4975,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -3864,9 +5004,7 @@ bpf_program__load(struct bpf_program *prog, } err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, - license, kern_version, &fd); - + result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->section_name); @@ -3910,20 +5048,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { - const char *pin_root_path; + const char *obj_name, *kconfig; struct bpf_program *prog; struct bpf_object *obj; - const char *obj_name; char tmp_name[64]; - bool relaxed_maps; - __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -3951,23 +5083,32 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; - obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - - CHECK_ERR(bpf_object__elf_init(obj), err, out); - CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), - err, out); - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + kconfig = OPTS_GET(opts, kconfig, NULL); + if (kconfig) { + obj->kconfig = strdup(kconfig); + if (!obj->kconfig) + return ERR_PTR(-ENOMEM); + } + + err = bpf_object__elf_init(obj); + err = err ? : bpf_object__check_endianness(obj); + err = err ? : bpf_object__elf_collect(obj); + err = err ? : bpf_object__collect_externs(obj); + err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object__init_prog_names(obj); + err = err ? : bpf_object__collect_reloc(obj); + if (err) + goto out; bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; + if (prog->type != BPF_PROG_TYPE_UNSPEC) + continue; + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, &attach_type); if (err == -ESRCH) @@ -3978,15 +5119,9 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_find_attach_btf_id(prog->section_name, - attach_type, - attach_prog_fd); - if (err <= 0) - goto out; - prog->attach_btf_id = err; - prog->attach_prog_fd = attach_prog_fd; - } + if (prog_type == BPF_PROG_TYPE_TRACING || + prog_type == BPF_PROG_TYPE_EXT) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } return obj; @@ -4026,7 +5161,7 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return ERR_PTR(-EINVAL); @@ -4038,7 +5173,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { if (!obj_buf || obj_buf_sz == 0) return ERR_PTR(-EINVAL); @@ -4070,8 +5205,11 @@ int bpf_object__unload(struct bpf_object *obj) if (!obj) return -EINVAL; - for (i = 0; i < obj->nr_maps; i++) + for (i = 0; i < obj->nr_maps; i++) { zclose(obj->maps[i].fd); + if (obj->maps[i].st_ops) + zfree(&obj->maps[i].st_ops->kern_vdata); + } for (i = 0; i < obj->nr_programs; i++) bpf_program__unload(&obj->programs[i]); @@ -4079,6 +5217,92 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +static int bpf_object__sanitize_maps(struct bpf_object *obj) +{ + struct bpf_map *m; + + bpf_object__for_each_map(m, obj) { + if (!bpf_map__is_internal(m)) + continue; + if (!obj->caps.global_data) { + pr_warn("kernel doesn't support global data\n"); + return -ENOTSUP; + } + if (!obj->caps.array_mmap) + m->def.map_flags ^= BPF_F_MMAPABLE; + } + + return 0; +} + +static int bpf_object__resolve_externs(struct bpf_object *obj, + const char *extra_kconfig) +{ + bool need_config = false; + struct extern_desc *ext; + int err, i; + void *data; + + if (obj->nr_extern == 0) + return 0; + + data = obj->maps[obj->kconfig_map_idx].mmaped; + + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = data + ext->data_off; + __u32 kver = get_kernel_version(); + + if (!kver) { + pr_warn("failed to get kernel version\n"); + return -EINVAL; + } + err = set_ext_value_num(ext, ext_val, kver); + if (err) + return err; + pr_debug("extern %s=0x%x\n", ext->name, kver); + } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + need_config = true; + } else { + pr_warn("unrecognized extern '%s'\n", ext->name); + return -EINVAL; + } + } + if (need_config && extra_kconfig) { + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + if (err) + return -EINVAL; + need_config = false; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (!ext->is_set) { + need_config = true; + break; + } + } + } + if (need_config) { + err = bpf_object__read_kconfig_file(obj, data); + if (err) + return -EINVAL; + } + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (!ext->is_set && !ext->is_weak) { + pr_warn("extern %s (strong) not resolved\n", ext->name); + return -ESRCH; + } else if (!ext->is_set) { + pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + ext->name); + } + } + + return 0; +} + int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; @@ -4097,9 +5321,21 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) obj->loaded = true; - CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); + err = bpf_object__probe_caps(obj); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); + + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; + + if (err) + goto out; return 0; out: @@ -4670,17 +5906,33 @@ void bpf_object__close(struct bpf_object *obj) btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { - zfree(&obj->maps[i].name); - zfree(&obj->maps[i].pin_path); - if (obj->maps[i].clear_priv) - obj->maps[i].clear_priv(&obj->maps[i], - obj->maps[i].priv); - obj->maps[i].priv = NULL; - obj->maps[i].clear_priv = NULL; + struct bpf_map *map = &obj->maps[i]; + + if (map->clear_priv) + map->clear_priv(map, map->priv); + map->priv = NULL; + map->clear_priv = NULL; + + if (map->mmaped) { + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; + } + + if (map->st_ops) { + zfree(&map->st_ops->data); + zfree(&map->st_ops->progs); + zfree(&map->st_ops->kern_func_off); + zfree(&map->st_ops); + } + + zfree(&map->name); + zfree(&map->pin_path); } - zfree(&obj->sections.rodata); - zfree(&obj->sections.data); + zfree(&obj->kconfig); + zfree(&obj->externs); + obj->nr_extern = 0; + zfree(&obj->maps); obj->nr_maps = 0; @@ -4820,6 +6072,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) prog->prog_ifindex = ifindex; } +const char *bpf_program__name(const struct bpf_program *prog) +{ + return prog->name; +} + const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) { const char *title; @@ -4936,6 +6193,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); +BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); +BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); enum bpf_attach_type bpf_program__get_expected_attach_type(struct bpf_program *prog) @@ -4972,7 +6231,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, */ #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) -static const struct { +#define SEC_DEF(sec_pfx, ptype, ...) { \ + .sec = sec_pfx, \ + .len = sizeof(sec_pfx) - 1, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ + __VA_ARGS__ \ +} + +struct bpf_sec_def; + +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +struct bpf_sec_def { const char *sec; size_t len; enum bpf_prog_type prog_type; @@ -4980,24 +6260,43 @@ static const struct { bool is_attachable; bool is_attach_btf; enum bpf_attach_type attach_type; -} section_names[] = { + attach_fn_t attach_fn; +}; + +static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), + SEC_DEF("kprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + SEC_DEF("kretprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_RAW_TP), - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FENTRY), - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FEXIT), + SEC_DEF("tracepoint/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("tp/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fexit/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("freplace/", EXT, + .is_attach_btf = true, + .attach_fn = attach_trace), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5052,6 +6351,7 @@ static const struct { BPF_CGROUP_GETSOCKOPT), BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT), + BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), }; #undef BPF_PROG_SEC_IMPL @@ -5059,12 +6359,26 @@ static const struct { #undef BPF_APROG_SEC #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#undef SEC_DEF #define MAX_TYPE_NAME_SIZE 32 +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + int i, n = ARRAY_SIZE(section_defs); + + for (i = 0; i < n; i++) { + if (strncmp(sec_name, + section_defs[i].sec, section_defs[i].len)) + continue; + return §ion_defs[i]; + } + return NULL; +} + static char *libbpf_get_type_names(bool attach_type) { - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; char *buf; buf = malloc(len); @@ -5073,16 +6387,16 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (attach_type && !section_names[i].is_attachable) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (attach_type && !section_defs[i].is_attachable) continue; - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); return NULL; } strcat(buf, " "); - strcat(buf, section_names[i].sec); + strcat(buf, section_defs[i].sec); } return buf; @@ -5091,57 +6405,205 @@ static char *libbpf_get_type_names(bool attach_type) int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + const struct bpf_sec_def *sec_def; char *type_names; - int i; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - *prog_type = section_names[i].prog_type; - *expected_attach_type = section_names[i].expected_attach_type; + sec_def = find_sec_def(name); + if (sec_def) { + *prog_type = sec_def->prog_type; + *expected_attach_type = sec_def->expected_attach_type; return 0; } - pr_warn("failed to guess program type from ELF section '%s'\n", name); + + pr_debug("failed to guess program type from ELF section '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { - pr_info("supported section(type) names are:%s\n", type_names); + pr_debug("supported section(type) names are:%s\n", type_names); free(type_names); } return -ESRCH; } -#define BTF_PREFIX "btf_trace_" +static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + size_t offset) +{ + struct bpf_map *map; + size_t i; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + if (!bpf_map__is_struct_ops(map)) + continue; + if (map->sec_offset <= offset && + offset - map->sec_offset < map->def.value_size) + return map; + } + + return NULL; +} + +/* Collect the reloc from ELF and populate the st_ops->progs[] */ +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data) +{ + const struct btf_member *member; + struct bpf_struct_ops *st_ops; + struct bpf_program *prog; + unsigned int shdr_idx; + const struct btf *btf; + struct bpf_map *map; + Elf_Data *symbols; + unsigned int moff; + const char *name; + __u32 member_idx; + GElf_Sym sym; + GElf_Rel rel; + int i, nrels; + + symbols = obj->efile.symbols; + btf = obj->btf; + nrels = shdr->sh_size / shdr->sh_entsize; + for (i = 0; i < nrels; i++) { + if (!gelf_getrel(data, i, &rel)) { + pr_warn("struct_ops reloc: failed to get %d reloc\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn("struct_ops reloc: symbol %zx not found\n", + (size_t)GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + map = find_struct_ops_map_by_offset(obj, rel.r_offset); + if (!map) { + pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", + (size_t)rel.r_offset); + return -EINVAL; + } + + moff = rel.r_offset - map->sec_offset; + shdr_idx = sym.st_shndx; + st_ops = map->st_ops; + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + map->name, + (long long)(rel.r_info >> 32), + (long long)sym.st_value, + shdr_idx, (size_t)rel.r_offset, + map->sec_offset, sym.st_name, name); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel.r_offset, shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + member = find_member_by_offset(st_ops->type, moff * 8); + if (!member) { + pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", + map->name, moff); + return -EINVAL; + } + member_idx = member - btf_members(st_ops->type); + name = btf__name_by_offset(btf, member->name_off); + + if (!resolve_func_ptr(btf, member->type, NULL)) { + pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", + map->name, name); + return -EINVAL; + } + + prog = bpf_object__find_prog_by_idx(obj, shdr_idx); + if (!prog) { + pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", + map->name, shdr_idx, name); + return -EINVAL; + } + + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(prog->section_name); + if (sec_def && + sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { + /* for pr_warn */ + prog->type = sec_def->prog_type; + goto invalid_prog; + } + + prog->type = BPF_PROG_TYPE_STRUCT_OPS; + prog->attach_btf_id = st_ops->type_id; + prog->expected_attach_type = member_idx; + } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || + prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + goto invalid_prog; + } + st_ops->progs[member_idx] = prog; + } + + return 0; + +invalid_prog: + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->section_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; +} + +#define BTF_TRACE_PREFIX "btf_trace_" +#define BTF_MAX_NAME_SIZE 128 + +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind) +{ + char btf_type_name[BTF_MAX_NAME_SIZE]; + int ret; + + ret = snprintf(btf_type_name, sizeof(btf_type_name), + "%s%s", prefix, name); + /* snprintf returns the number of characters written excluding the + * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it + * indicates truncation. + */ + if (ret < 0 || ret >= sizeof(btf_type_name)) + return -ENAMETOOLONG; + return btf__find_by_name_kind(btf, btf_type_name, kind); +} + +static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, + enum bpf_attach_type attach_type) +{ + int err; + + if (attach_type == BPF_TRACE_RAW_TP) + err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, + BTF_KIND_TYPEDEF); + else + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + + return err; +} + int libbpf_find_vmlinux_btf_id(const char *name, enum bpf_attach_type attach_type) { - struct btf *btf = bpf_core_find_kernel_btf(); - char raw_tp_btf[128] = BTF_PREFIX; - char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; - const char *btf_name; - int err = -EINVAL; - __u32 kind; + struct btf *btf; + btf = libbpf_find_kernel_btf(); if (IS_ERR(btf)) { pr_warn("vmlinux BTF is not found\n"); return -EINVAL; } - if (attach_type == BPF_TRACE_RAW_TP) { - /* prepend "btf_trace_" prefix per kernel convention */ - strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); - btf_name = raw_tp_btf; - kind = BTF_KIND_TYPEDEF; - } else { - btf_name = name; - kind = BTF_KIND_FUNC; - } - err = btf__find_by_name_kind(btf, btf_name, kind); - btf__free(btf); - return err; + return __find_vmlinux_btf_id(btf, name, attach_type); } static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) @@ -5177,26 +6639,28 @@ out: return err; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd) +static int libbpf_find_attach_btf_id(struct bpf_program *prog) { + enum bpf_attach_type attach_type = prog->expected_attach_type; + __u32 attach_prog_fd = prog->attach_prog_fd; + const char *name = prog->section_name; int i, err; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (!section_names[i].is_attach_btf) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (!section_defs[i].is_attach_btf) continue; - if (strncmp(name, section_names[i].sec, section_names[i].len)) + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_names[i].len, + err = libbpf_find_prog_btf_id(name + section_defs[i].len, attach_prog_fd); else - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, - attach_type); + err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, + name + section_defs[i].len, + attach_type); if (err <= 0) pr_warn("%s is not found in vmlinux BTF\n", name); return err; @@ -5214,18 +6678,18 @@ int libbpf_attach_type_by_name(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; - if (!section_names[i].is_attachable) + if (!section_defs[i].is_attachable) return -EINVAL; - *attach_type = section_names[i].attach_type; + *attach_type = section_defs[i].attach_type; return 0; } - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { - pr_info("attachable section(type) names are:%s\n", type_names); + pr_debug("attachable section(type) names are:%s\n", type_names); free(type_names); } @@ -5466,17 +6930,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } struct bpf_link { + int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + bool disconnected; }; +/* Release "ownership" of underlying BPF resource (typically, BPF program + * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected + * link, when destructed through bpf_link__destroy() call won't attempt to + * detach/unregisted that BPF resource. This is useful in situations where, + * say, attached BPF program has to outlive userspace program that attached it + * in the system. Depending on type of BPF program, though, there might be + * additional steps (like pinning BPF program in BPF FS) necessary to ensure + * exit of userspace program doesn't trigger automatic detachment and clean up + * inside the kernel. + */ +void bpf_link__disconnect(struct bpf_link *link) +{ + link->disconnected = true; +} + int bpf_link__destroy(struct bpf_link *link) { - int err; + int err = 0; if (!link) return 0; - err = link->destroy(link); + if (!link->disconnected && link->detach) + err = link->detach(link); + if (link->destroy) + link->destroy(link); free(link); return err; @@ -5487,7 +6971,7 @@ struct bpf_link_fd { int fd; /* hook FD */ }; -static int bpf_link__destroy_perf_event(struct bpf_link *link) +static int bpf_link__detach_perf_event(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; int err; @@ -5519,10 +7003,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_perf_event; + link->link.detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -5679,6 +7163,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return link; } +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *func_name; + bool retprobe; + + func_name = bpf_program__title(prog, false) + sec->len; + retprobe = strcmp(sec->sec, "kretprobe/") == 0; + + return bpf_program__attach_kprobe(prog, retprobe, func_name); +} + struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -5791,7 +7287,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return link; } -static int bpf_link__destroy_fd(struct bpf_link *link) +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + char *sec_name, *tp_cat, *tp_name; + struct bpf_link *link; + + sec_name = strdup(bpf_program__title(prog, false)); + if (!sec_name) + return ERR_PTR(-ENOMEM); + + /* extract "tp/<category>/<name>" */ + tp_cat = sec_name + sec->len; + tp_name = strchr(tp_cat, '/'); + if (!tp_name) { + link = ERR_PTR(-EINVAL); + goto out; + } + *tp_name = '\0'; + tp_name++; + + link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); +out: + free(sec_name); + return link; +} + +static int bpf_link__detach_fd(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; @@ -5812,10 +7334,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -5830,6 +7352,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return (struct bpf_link *)link; } +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *tp_name = bpf_program__title(prog, false) + sec->len; + + return bpf_program__attach_raw_tracepoint(prog, tp_name); +} + struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; @@ -5843,10 +7373,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -5861,6 +7391,75 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return (struct bpf_link *)link; } +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_trace(prog); +} + +struct bpf_link *bpf_program__attach(struct bpf_program *prog) +{ + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(bpf_program__title(prog, false)); + if (!sec_def || !sec_def->attach_fn) + return ERR_PTR(-ESRCH); + + return sec_def->attach_fn(sec_def, prog); +} + +static int bpf_link__detach_struct_ops(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + __u32 zero = 0; + + if (bpf_map_delete_elem(l->fd, &zero)) + return -errno; + + return 0; +} + +struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) +{ + struct bpf_struct_ops *st_ops; + struct bpf_link_fd *link; + __u32 i, zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map) || map->fd == -1) + return ERR_PTR(-EINVAL); + + link = calloc(1, sizeof(*link)); + if (!link) + return ERR_PTR(-EINVAL); + + st_ops = map->st_ops; + for (i = 0; i < btf_vlen(st_ops->type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; + + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; + } + + err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); + if (err) { + err = -errno; + free(link); + return ERR_PTR(err); + } + + link->link.detach = bpf_link__detach_struct_ops; + link->fd = map->fd; + + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -5944,7 +7543,7 @@ struct perf_buffer { size_t mmap_size; struct perf_cpu_buf **cpu_bufs; struct epoll_event *events; - int cpu_cnt; + int cpu_cnt; /* number of allocated CPU buffers */ int epoll_fd; /* perf event FD */ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ }; @@ -6078,11 +7677,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { + const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map = {}; char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; + bool *online = NULL; __u32 map_info_len; - int err, i; + int err, i, j, n; if (page_cnt & (page_cnt - 1)) { pr_warn("page count should be power of two, but is %zu\n", @@ -6151,20 +7752,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - for (i = 0; i < pb->cpu_cnt; i++) { + err = parse_cpu_mask_file(online_cpus_file, &online, &n); + if (err) { + pr_warn("failed to get online CPU mask: %d\n", err); + goto error; + } + + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf; int cpu, map_key; cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + /* in case user didn't explicitly requested particular CPUs to + * be attached to, skip offline/not present CPUs + */ + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) + continue; + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); if (IS_ERR(cpu_buf)) { err = PTR_ERR(cpu_buf); goto error; } - pb->cpu_bufs[i] = cpu_buf; + pb->cpu_bufs[j] = cpu_buf; err = bpf_map_update_elem(pb->map_fd, &map_key, &cpu_buf->fd, 0); @@ -6176,21 +7789,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; + pb->events[j].events = EPOLLIN; + pb->events[j].data.ptr = cpu_buf; if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { + &pb->events[j]) < 0) { err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } + j++; } + pb->cpu_cnt = j; + free(online); return pb; error: + free(online); if (pb) perf_buffer__free(pb); return ERR_PTR(err); @@ -6521,62 +8138,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } -int libbpf_num_possible_cpus(void) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; + int err = 0, n, len, start, end = -1; + bool *tmp; - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; + *mask = NULL; + *mask_sz = 0; + + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + while (*s) { + if (*s == ',' || *s == '\n') { + s++; + continue; + } + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); + if (n <= 0 || n > 2) { + pr_warn("Failed to get CPU range %s: %d\n", s, n); + err = -EINVAL; + goto cleanup; + } else if (n == 1) { + end = start; + } + if (start < 0 || start > end) { + pr_warn("Invalid CPU range [%d,%d] in %s\n", + start, end, s); + err = -EINVAL; + goto cleanup; + } + tmp = realloc(*mask, end + 1); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *mask = tmp; + memset(tmp + *mask_sz, 0, start - *mask_sz); + memset(tmp + start, 1, end - start + 1); + *mask_sz = end + 1; + s += len; + } + if (!*mask_sz) { + pr_warn("Empty CPU range\n"); + return -EINVAL; + } + return 0; +cleanup: + free(*mask); + *mask = NULL; + return err; +} + +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) +{ + int fd, err = 0, len; + char buf[128]; fd = open(fcpu, O_RDONLY); if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; + err = -errno; + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; + err = len ? -errno : -EINVAL; + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + return err; } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; + if (len >= sizeof(buf)) { + pr_warn("CPU mask is too big in file %s\n", fcpu); + return -E2BIG; } buf[len] = '\0'; - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; + return parse_cpu_mask_str(buf, mask, mask_sz); +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + static int cpus; + int err, n, i, tmp_cpus; + bool *mask; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + err = parse_cpu_mask_file(fcpu, &mask, &n); + if (err) + return err; + + tmp_cpus = 0; + for (i = 0; i < n; i++) { + if (mask[i]) + tmp_cpus++; } + free(mask); WRITE_ONCE(cpus, tmp_cpus); return tmp_cpus; } + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, + .object_name = s->name, + ); + struct bpf_object *obj; + int i; + + /* Attempt to preserve opts->object_name, unless overriden by user + * explicitly. Overwriting object name for skeletons is discouraged, + * as it breaks global data maps, because they contain object name + * prefix as their own map name prefix. When skeleton is generated, + * bpftool is making an assumption that this name will stay the same. + */ + if (opts) { + memcpy(&skel_opts, opts, sizeof(*opts)); + if (!opts->object_name) + skel_opts.object_name = s->name; + } + + obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); + if (IS_ERR(obj)) { + pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", + s->name, PTR_ERR(obj)); + return PTR_ERR(obj); + } + + *s->obj = obj; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map **map = s->maps[i].map; + const char *name = s->maps[i].name; + void **mmaped = s->maps[i].mmaped; + + *map = bpf_object__find_map_by_name(obj, name); + if (!*map) { + pr_warn("failed to find skeleton map '%s'\n", name); + return -ESRCH; + } + + /* externs shouldn't be pre-setup from user code */ + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) + *mmaped = (*map)->mmaped; + } + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program **prog = s->progs[i].prog; + const char *name = s->progs[i].name; + + *prog = bpf_object__find_program_by_name(obj, name); + if (!*prog) { + pr_warn("failed to find skeleton program '%s'\n", name); + return -ESRCH; + } + } + + return 0; +} + +int bpf_object__load_skeleton(struct bpf_object_skeleton *s) +{ + int i, err; + + err = bpf_object__load(*s->obj); + if (err) { + pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + return err; + } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map *map = *s->maps[i].map; + size_t mmap_sz = bpf_map_mmap_sz(map); + int prot, map_fd = bpf_map__fd(map); + void **mmaped = s->maps[i].mmaped; + + if (!mmaped) + continue; + + if (!(map->def.map_flags & BPF_F_MMAPABLE)) { + *mmaped = NULL; + continue; + } + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure, so we don't need to worry + * about it from skeleton's clean up perspective. + */ + *mmaped = mmap(map->mmaped, mmap_sz, prot, + MAP_SHARED | MAP_FIXED, map_fd, 0); + if (*mmaped == MAP_FAILED) { + err = -errno; + *mmaped = NULL; + pr_warn("failed to re-mmap() map '%s': %d\n", + bpf_map__name(map), err); + return err; + } + } + + return 0; +} + +int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program *prog = *s->progs[i].prog; + struct bpf_link **link = s->progs[i].link; + const struct bpf_sec_def *sec_def; + const char *sec_name = bpf_program__title(prog, false); + + sec_def = find_sec_def(sec_name); + if (!sec_def || !sec_def->attach_fn) + continue; + + *link = sec_def->attach_fn(sec_def, prog); + if (IS_ERR(*link)) { + pr_warn("failed to auto-attach program '%s': %ld\n", + bpf_program__name(prog), PTR_ERR(*link)); + return PTR_ERR(*link); + } + } + + return 0; +} + +void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_link **link = s->progs[i].link; + + if (!IS_ERR_OR_NULL(*link)) + bpf_link__destroy(*link); + *link = NULL; + } +} + +void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) +{ + if (s->progs) + bpf_object__detach_skeleton(s); + if (s->obj) + bpf_object__close(*s->obj); + free(s->maps); + free(s->progs); + free(s); +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0dbf4bfba0c4..3fe12c9d1f92 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -17,14 +17,12 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -67,28 +65,6 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; -/* Helper macro to declare and initialize libbpf options struct - * - * This dance with uninitialized declaration, followed by memset to zero, - * followed by assignment using compound literal syntax is done to preserve - * ability to use a nice struct field initialization syntax and **hopefully** - * have all the padding bytes initialized to zero. It's not guaranteed though, - * when copying literal, that compiler won't copy garbage in literal's padding - * bytes, but that's the best way I've found and it seems to work in practice. - * - * Macro declares opts struct of given type and name, zero-initializes, - * including any extra padding, it with memset() and then assigns initial - * values provided by users in struct initializer-syntax as varargs. - */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ - struct TYPE NAME = ({ \ - memset(&NAME, 0, sizeof(struct TYPE)); \ - (struct TYPE) { \ - .sz = sizeof(struct TYPE), \ - __VA_ARGS__ \ - }; \ - }) - struct bpf_object_open_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -101,7 +77,11 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* process CO-RE relocations non-strictly, allowing them to fail */ + /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. + * Value is ignored. Relocations always are processed non-strictly. + * Non-relocatable instructions are replaced with invalid ones to + * prevent accidental errors. + * */ bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be @@ -109,15 +89,19 @@ struct bpf_object_open_opts { */ const char *pin_root_path; __u32 attach_prog_fd; + /* Additional kernel config content that augments and overrides + * system Kconfig for CONFIG_xxx externs. + */ + const char *kconfig; }; -#define bpf_object_open_opts__last_field attach_prog_fd +#define bpf_object_open_opts__last_field kconfig LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts); + const struct bpf_object_open_opts *opts); /* deprecated bpf_object__open variants */ LIBBPF_API struct bpf_object * @@ -126,11 +110,6 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); - enum libbpf_pin_type { LIBBPF_PIN_NONE, /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ @@ -161,6 +140,7 @@ struct bpf_object_load_attr { LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); + LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); @@ -171,6 +151,9 @@ LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); +LIBBPF_API struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name); LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -214,6 +197,7 @@ LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); +LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); @@ -235,9 +219,12 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * +bpf_program__attach(struct bpf_program *prog); +LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, @@ -256,6 +243,8 @@ bpf_program__attach_raw_tracepoint(struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_trace(struct bpf_program *prog); +struct bpf_map; +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_insn; /* @@ -332,6 +321,8 @@ LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, @@ -352,6 +343,8 @@ LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -371,7 +364,6 @@ struct bpf_map_def { * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, * so no need to worry about a name clash. */ -struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -512,18 +504,6 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, bpf_perf_event_print_t fn, void *private_data); -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); - struct bpf_prog_linfo; struct bpf_prog_info; @@ -550,6 +530,7 @@ LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); /* * Get bpf_prog_info in continuous memory @@ -630,6 +611,50 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); */ LIBBPF_API int libbpf_num_possible_cpus(void); +struct bpf_map_skeleton { + const char *name; + struct bpf_map **map; + void **mmaped; +}; + +struct bpf_prog_skeleton { + const char *name; + struct bpf_program **prog; + struct bpf_link **link; +}; + +struct bpf_object_skeleton { + size_t sz; /* size of this struct, for forward/backward compatibility */ + + const char *name; + void *data; + size_t data_sz; + + struct bpf_object **obj; + + int map_cnt; + int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; + int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ + struct bpf_prog_skeleton *progs; +}; + +LIBBPF_API int +bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts); +LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); + +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8ddc2c40e482..b035122142bb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -208,3 +208,30 @@ LIBBPF_0.0.6 { btf__find_by_name_kind; libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; + +LIBBPF_0.0.7 { + global: + btf_dump__emit_type_decl; + bpf_link__disconnect; + bpf_map__attach_struct_ops; + bpf_map_delete_batch; + bpf_map_lookup_and_delete_batch; + bpf_map_lookup_batch; + bpf_map_update_batch; + bpf_object__find_program_by_name; + bpf_object__attach_skeleton; + bpf_object__destroy_skeleton; + bpf_object__detach_skeleton; + bpf_object__load_skeleton; + bpf_object__open_skeleton; + bpf_probe_large_insn_limit; + bpf_prog_attach_xattr; + bpf_program__attach; + bpf_program__name; + bpf_program__is_extension; + bpf_program__is_struct_ops; + bpf_program__set_extension; + bpf_program__set_struct_ops; + btf__align_of; + libbpf_find_kernel_btf; +} LIBBPF_0.0.6; diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template index ac17fcef2108..b45ed534bdfb 100644 --- a/tools/lib/bpf/libbpf.pc.template +++ b/tools/lib/bpf/libbpf.pc.template @@ -8,5 +8,5 @@ Name: libbpf Description: BPF library Version: @VERSION@ Libs: -L${libdir} -lbpf -Requires.private: libelf +Requires.private: libelf zlib Cflags: -I${includedir} diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h new file mode 100644 index 000000000000..a23ae1ac27eb --- /dev/null +++ b/tools/lib/bpf/libbpf_common.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Common user-facing libbpf helpers. + * + * Copyright (c) 2019 Facebook + */ + +#ifndef __LIBBPF_LIBBPF_COMMON_H +#define __LIBBPF_LIBBPF_COMMON_H + +#include <string.h> + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) + +#endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index 4343e40588c6..0afb51f7a919 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -13,6 +13,9 @@ #include "libbpf.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) #define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 97ac17a64a58..8c3afbd97747 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts, for (i = opts_sz; i < user_sz; i++) { if (opts[i]) { - pr_warn("%s has non-zero extra bytes", + pr_warn("%s has non-zero extra bytes\n", type_name); return false; } @@ -95,9 +95,28 @@ static inline bool libbpf_validate_opts(const char *opts, #define OPTS_GET(opts, field, fallback_value) \ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); + +struct nlattr; +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +int libbpf_netlink_open(unsigned int *nl_pid); +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + struct btf_ext_info { /* * info points to the individual info section (e.g. func_info and diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index a9eb8b322671..b782ebef6ac9 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,6 +17,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static bool grep(const char *buffer, const char *pattern) { return !!strstr(buffer, pattern); @@ -103,6 +106,8 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_EXT: default: break; } @@ -251,6 +256,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STRUCT_OPS: default: break; } @@ -321,3 +327,24 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, return res; } + +/* + * Probe for availability of kernel commit (5.3): + * + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") + */ +bool bpf_probe_large_insn_limit(__u32 ifindex) +{ + struct bpf_insn insns[BPF_MAXINSNS + 1]; + int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + errno = 0; + probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); + + return errno != E2BIG && errno != EINVAL; +} diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 5065c1aa1061..431bd25c6cdb 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -15,6 +15,9 @@ #include "libbpf_internal.h" #include "nlattr.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_NETLINK #define SOL_NETLINK 270 #endif diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 8db44bbfc66d..0ad41dfea8eb 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -13,6 +13,9 @@ #include <string.h> #include <stdio.h> +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { [LIBBPF_NLA_U8] = sizeof(uint8_t), [LIBBPF_NLA_U16] = sizeof(uint16_t), diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index b8064eedc177..146da01979c7 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -4,6 +4,9 @@ #include <stdio.h> #include "str_error.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl * libc, while checking strerror_r() return to avoid having to check this in diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 8e0ffa800a71..9807903f121e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -32,6 +32,9 @@ #include "libbpf_internal.h" #include "xsk.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_XDP #define SOL_XDP 283 #endif diff --git a/tools/perf/lib/Build b/tools/lib/perf/Build index 2ef9a4ec6d99..2ef9a4ec6d99 100644 --- a/tools/perf/lib/Build +++ b/tools/lib/perf/Build diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile new file mode 100644 index 000000000000..972754082a85 --- /dev/null +++ b/tools/lib/perf/Documentation/Makefile @@ -0,0 +1,156 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/perf/Documentation/Makefile + +include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak + +MAN3_TXT = libperf.txt +MAN7_TXT = libperf-counting.txt libperf-sampling.txt +MAN_EX = examples/*.c + +MAN_TXT = $(MAN3_TXT) $(MAN7_TXT) + +_MAN_XML = $(patsubst %.txt,%.xml,$(MAN_TXT)) +_MAN_HTML = $(patsubst %.txt,%.html,$(MAN_TXT)) +_MAN_3 = $(patsubst %.txt,%.3,$(MAN3_TXT)) +_MAN_7 = $(patsubst %.txt,%.7,$(MAN7_TXT)) + +MAN_XML = $(addprefix $(OUTPUT),$(_MAN_XML)) +MAN_HTML = $(addprefix $(OUTPUT),$(_MAN_HTML)) +MAN_3 = $(addprefix $(OUTPUT),$(_MAN_3)) +MAN_7 = $(addprefix $(OUTPUT),$(_MAN_7)) +MAN_X = $(MAN_3) $(MAN_7) + +# Make the path relative to DESTDIR, not prefix +ifndef DESTDIR + prefix ?=$(HOME) +endif + +mandir ?= $(prefix)/share/man +man3dir = $(mandir)/man3 +man7dir = $(mandir)/man7 + +docdir ?= $(prefix)/share/doc/libperf +htmldir = $(docdir)/html +exdir = $(docdir)/examples + +ASCIIDOC = asciidoc +ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf +ASCIIDOC_HTML = xhtml11 +MANPAGE_XSL = manpage-normal.xsl +XMLTO_EXTRA = +XMLTO =xmlto + +INSTALL ?= install +RM ?= rm -f + +# For asciidoc ... +# -7.1.2, no extra settings are needed. +# 8.0-, set ASCIIDOC8. +# + +# For docbook-xsl ... +# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) +# 1.69.0, no extra settings are needed? +# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? +# 1.71.1, no extra settings are needed? +# 1.72.0, set DOCBOOK_XSL_172. +# 1.73.0-, set ASCIIDOC_NO_ROFF + +# If you had been using DOCBOOK_XSL_172 in an attempt to get rid +# of 'the ".ft C" problem' in your generated manpages, and you +# instead ended up with weird characters around callouts, try +# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). + +ifdef ASCIIDOC8 + ASCIIDOC_EXTRA += -a asciidoc7compatible +endif +ifdef DOCBOOK_XSL_172 + ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff + MANPAGE_XSL = manpage-1.72.xsl +else + ifdef ASCIIDOC_NO_ROFF + # docbook-xsl after 1.72 needs the regular XSL, but will not + # pass-thru raw roff codes from asciidoc.conf, so turn them off. + ASCIIDOC_EXTRA += -a libperf-asciidoc-no-roff + endif +endif +ifdef MAN_BOLD_LITERAL + XMLTO_EXTRA += -m manpage-bold-literal.xsl +endif +ifdef DOCBOOK_SUPPRESS_SP + XMLTO_EXTRA += -m manpage-suppress-sp.xsl +endif + +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +export DESTDIR DESTDIR_SQ + +# Please note that there is a minor bug in asciidoc. +# The version after 6.0.3 _will_ include the patch found here: +# http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2 +# +# Until that version is released you may have to apply the patch +# yourself - yes, all 6 characters of it! + +QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir +QUIET_SUBDIR1 = + +ifneq ($(findstring $(MAKEFLAGS),w),w) + PRINT_DIR = --no-print-directory +else # "make -w" + NO_SUBDIR = : +endif + +ifneq ($(findstring $(MAKEFLAGS),s),s) + ifneq ($(V),1) + QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; + QUIET_XMLTO = @echo ' XMLTO '$@; + endif +endif + +all: $(MAN_X) $(MAN_HTML) + +$(MAN_HTML) $(MAN_X): asciidoc.conf + +install-man: all + $(call QUIET_INSTALL, man) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \ + $(INSTALL) -m 644 $(MAN_3) $(DESTDIR)$(man3dir); \ + $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ + $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); + +install-html: + $(call QUIET_INSTALL, html) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ + $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ + +install-examples: + $(call QUIET_INSTALL, examples) \ + $(INSTALL) -d -m 755 $(DESTDIR)$(exdir); \ + $(INSTALL) -m 644 $(MAN_EX) $(DESTDIR)$(exdir); \ + +CLEAN_FILES = \ + $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ + $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ + $(MAN_X) + +clean: + $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES) + +$(MAN_3): $(OUTPUT)%.3: %.xml + $(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + +$(MAN_7): $(OUTPUT)%.7: %.xml + $(QUIET_XMLTO)$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< + +$(MAN_XML): $(OUTPUT)%.xml: %.txt + $(QUIET_ASCIIDOC)$(ASCIIDOC) -b docbook -d manpage \ + $(ASCIIDOC_EXTRA) -alibperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ + mv $@+ $@ + +$(MAN_HTML): $(OUTPUT)%.html: %.txt + $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ + $(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \ + mv $@+ $@ diff --git a/tools/lib/perf/Documentation/asciidoc.conf b/tools/lib/perf/Documentation/asciidoc.conf new file mode 100644 index 000000000000..9d5a5a5ee091 --- /dev/null +++ b/tools/lib/perf/Documentation/asciidoc.conf @@ -0,0 +1,120 @@ +## linktep: macro +# +# Usage: linktep:command[manpage-section] +# +# Note, {0} is the manpage section, while {target} is the command. +# +# Show TEP link as: <command>(<section>); if section is defined, else just show +# the command. + +[macros] +(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= + +[attributes] +asterisk=* +plus=+ +caret=^ +startsb=[ +endsb=] +tilde=~ + +ifdef::backend-docbook[] +[linktep-inlinemacro] +{0%{target}} +{0#<citerefentry>} +{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>} +{0#</citerefentry>} +endif::backend-docbook[] + +ifdef::backend-docbook[] +ifndef::tep-asciidoc-no-roff[] +# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. +# v1.72 breaks with this because it replaces dots not in roff requests. +[listingblock] +<example><title>{title}</title> +<literallayout> +ifdef::doctype-manpage[] + .ft C +endif::doctype-manpage[] +| +ifdef::doctype-manpage[] + .ft +endif::doctype-manpage[] +</literallayout> +{title#}</example> +endif::tep-asciidoc-no-roff[] + +ifdef::tep-asciidoc-no-roff[] +ifdef::doctype-manpage[] +# The following two small workarounds insert a simple paragraph after screen +[listingblock] +<example><title>{title}</title> +<literallayout> +| +</literallayout><simpara></simpara> +{title#}</example> + +[verseblock] +<formalpara{id? id="{id}"}><title>{title}</title><para> +{title%}<literallayout{id? id="{id}"}> +{title#}<literallayout> +| +</literallayout> +{title#}</para></formalpara> +{title%}<simpara></simpara> +endif::doctype-manpage[] +endif::tep-asciidoc-no-roff[] +endif::backend-docbook[] + +ifdef::doctype-manpage[] +ifdef::backend-docbook[] +[header] +template::[header-declarations] +<refentry> +<refmeta> +<refentrytitle>{mantitle}</refentrytitle> +<manvolnum>{manvolnum}</manvolnum> +<refmiscinfo class="source">libperf</refmiscinfo> +<refmiscinfo class="version">{libperf_version}</refmiscinfo> +<refmiscinfo class="manual">libperf Manual</refmiscinfo> +</refmeta> +<refnamediv> + <refname>{manname1}</refname> + <refname>{manname2}</refname> + <refname>{manname3}</refname> + <refname>{manname4}</refname> + <refname>{manname5}</refname> + <refname>{manname6}</refname> + <refname>{manname7}</refname> + <refname>{manname8}</refname> + <refname>{manname9}</refname> + <refname>{manname10}</refname> + <refname>{manname11}</refname> + <refname>{manname12}</refname> + <refname>{manname13}</refname> + <refname>{manname14}</refname> + <refname>{manname15}</refname> + <refname>{manname16}</refname> + <refname>{manname17}</refname> + <refname>{manname18}</refname> + <refname>{manname19}</refname> + <refname>{manname20}</refname> + <refname>{manname21}</refname> + <refname>{manname22}</refname> + <refname>{manname23}</refname> + <refname>{manname24}</refname> + <refname>{manname25}</refname> + <refname>{manname26}</refname> + <refname>{manname27}</refname> + <refname>{manname28}</refname> + <refname>{manname29}</refname> + <refname>{manname30}</refname> + <refpurpose>{manpurpose}</refpurpose> +</refnamediv> +endif::backend-docbook[] +endif::doctype-manpage[] + +ifdef::backend-xhtml11[] +[linktep-inlinemacro] +<a href="{target}.html">{target}{0?({0})}</a> +endif::backend-xhtml11[] diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c new file mode 100644 index 000000000000..8e1a926a9cfe --- /dev/null +++ b/tools/lib/perf/Documentation/examples/sampling.c @@ -0,0 +1,119 @@ +#include <linux/perf_event.h> +#include <perf/evlist.h> +#include <perf/evsel.h> +#include <perf/cpumap.h> +#include <perf/threadmap.h> +#include <perf/mmap.h> +#include <perf/core.h> +#include <perf/event.h> +#include <stdio.h> +#include <unistd.h> + +static int libperf_print(enum libperf_print_level level, + const char *fmt, va_list ap) +{ + return vfprintf(stderr, fmt, ap); +} + +union u64_swap { + __u64 val64; + __u32 val32[2]; +}; + +int main(int argc, char **argv) +{ + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_mmap *map; + struct perf_cpu_map *cpus; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .disabled = 1, + .freq = 1, + .sample_freq = 10, + .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD, + }; + int err = -1; + union perf_event *event; + + libperf_init(libperf_print); + + cpus = perf_cpu_map__new(NULL); + if (!cpus) { + fprintf(stderr, "failed to create cpus\n"); + return -1; + } + + evlist = perf_evlist__new(); + if (!evlist) { + fprintf(stderr, "failed to create evlist\n"); + goto out_cpus; + } + + evsel = perf_evsel__new(&attr); + if (!evsel) { + fprintf(stderr, "failed to create cycles\n"); + goto out_cpus; + } + + perf_evlist__add(evlist, evsel); + + perf_evlist__set_maps(evlist, cpus, NULL); + + err = perf_evlist__open(evlist); + if (err) { + fprintf(stderr, "failed to open evlist\n"); + goto out_evlist; + } + + err = perf_evlist__mmap(evlist, 4); + if (err) { + fprintf(stderr, "failed to mmap evlist\n"); + goto out_evlist; + } + + perf_evlist__enable(evlist); + sleep(3); + perf_evlist__disable(evlist); + + perf_evlist__for_each_mmap(evlist, map, false) { + if (perf_mmap__read_init(map) < 0) + continue; + + while ((event = perf_mmap__read_event(map)) != NULL) { + int cpu, pid, tid; + __u64 ip, period, *array; + union u64_swap u; + + array = event->sample.array; + + ip = *array; + array++; + + u.val64 = *array; + pid = u.val32[0]; + tid = u.val32[1]; + array++; + + u.val64 = *array; + cpu = u.val32[0]; + array++; + + period = *array; + + fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n", + cpu, pid, tid, ip, period); + + perf_mmap__consume(map); + } + + perf_mmap__read_done(map); + } + +out_evlist: + perf_evlist__delete(evlist); +out_cpus: + perf_cpu_map__put(cpus); + return err; +} diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt new file mode 100644 index 000000000000..cae9757f49c1 --- /dev/null +++ b/tools/lib/perf/Documentation/libperf-counting.txt @@ -0,0 +1,211 @@ +libperf-counting(7) +=================== + +NAME +---- +libperf-counting - counting interface + +DESCRIPTION +----------- +The counting interface provides API to meassure and get count for specific perf events. + +The following test tries to explain count on `counting.c` example. + +It is by no means complete guide to counting, but shows libperf basic API for counting. + +The `counting.c` comes with libbperf package and can be compiled and run like: + +[source,bash] +-- +$ gcc -o counting counting.c -lperf +$ sudo ./counting +count 176792, enabled 176944, run 176944 +count 176242, enabled 176242, run 176242 +-- + +It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event, +which is available only for root. + +The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it: + +* creates events +* adds them to the event list +* opens and enables events through the event list +* does some workload +* disables events +* reads and displays event counts +* destroys the event list + +The first thing you need to do before using libperf is to call init function: + +[source,c] +-- + 8 static int libperf_print(enum libperf_print_level level, + 9 const char *fmt, va_list ap) + 10 { + 11 return vfprintf(stderr, fmt, ap); + 12 } + + 14 int main(int argc, char **argv) + 15 { + ... + 35 libperf_init(libperf_print); +-- + +It will setup the library and sets function for debug output from library. + +The `libperf_print` callback will receive any message with its debug level, +defined as: + +[source,c] +-- +enum libperf_print_level { + LIBPERF_ERR, + LIBPERF_WARN, + LIBPERF_INFO, + LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, +}; +-- + +Once the setup is complete we start by defining specific events using the `struct perf_event_attr`. + +We create software events for cpu and task: + +[source,c] +-- + 20 struct perf_event_attr attr1 = { + 21 .type = PERF_TYPE_SOFTWARE, + 22 .config = PERF_COUNT_SW_CPU_CLOCK, + 23 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + 24 .disabled = 1, + 25 }; + 26 struct perf_event_attr attr2 = { + 27 .type = PERF_TYPE_SOFTWARE, + 28 .config = PERF_COUNT_SW_TASK_CLOCK, + 29 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING, + 30 .disabled = 1, + 31 }; +-- + +The `read_format` setup tells perf to include timing details together with each count. + +Next step is to prepare threads map. + +In this case we will monitor current process, so we create threads map with single pid (0): + +[source,c] +-- + 37 threads = perf_thread_map__new_dummy(); + 38 if (!threads) { + 39 fprintf(stderr, "failed to create threads\n"); + 40 return -1; + 41 } + 42 + 43 perf_thread_map__set_pid(threads, 0, 0); +-- + +Now we create libperf's event list, which will serve as holder for the events we want: + +[source,c] +-- + 45 evlist = perf_evlist__new(); + 46 if (!evlist) { + 47 fprintf(stderr, "failed to create evlist\n"); + 48 goto out_threads; + 49 } +-- + +We create libperf's events for the attributes we defined earlier and add them to the list: + +[source,c] +-- + 51 evsel = perf_evsel__new(&attr1); + 52 if (!evsel) { + 53 fprintf(stderr, "failed to create evsel1\n"); + 54 goto out_evlist; + 55 } + 56 + 57 perf_evlist__add(evlist, evsel); + 58 + 59 evsel = perf_evsel__new(&attr2); + 60 if (!evsel) { + 61 fprintf(stderr, "failed to create evsel2\n"); + 62 goto out_evlist; + 63 } + 64 + 65 perf_evlist__add(evlist, evsel); +-- + +Configure event list with the thread map and open events: + +[source,c] +-- + 67 perf_evlist__set_maps(evlist, NULL, threads); + 68 + 69 err = perf_evlist__open(evlist); + 70 if (err) { + 71 fprintf(stderr, "failed to open evsel\n"); + 72 goto out_evlist; + 73 } +-- + +Both events are created as disabled (note the `disabled = 1` assignment above), +so we need to enable the whole list explicitely (both events). + +From this moment events are counting and we can do our workload. + +When we are done we disable the events list. + +[source,c] +-- + 75 perf_evlist__enable(evlist); + 76 + 77 while (count--); + 78 + 79 perf_evlist__disable(evlist); +-- + +Now we need to get the counts from events, following code iterates throught the events list and read counts: + +[source,c] +-- + 81 perf_evlist__for_each_evsel(evlist, evsel) { + 82 perf_evsel__read(evsel, 0, 0, &counts); + 83 fprintf(stdout, "count %llu, enabled %llu, run %llu\n", + 84 counts.val, counts.ena, counts.run); + 85 } +-- + +And finaly cleanup. + +We close the whole events list (both events) and remove it together with the threads map: + +[source,c] +-- + 87 perf_evlist__close(evlist); + 88 + 89 out_evlist: + 90 perf_evlist__delete(evlist); + 91 out_threads: + 92 perf_thread_map__put(threads); + 93 return err; + 94 } +-- + +REPORTING BUGS +-------------- +Report bugs to <linux-perf-users@vger.kernel.org>. + +LICENSE +------- +libperf is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + +SEE ALSO +-------- +libperf(3), libperf-sampling(7) diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt new file mode 100644 index 000000000000..d71a7b4fcf5f --- /dev/null +++ b/tools/lib/perf/Documentation/libperf-sampling.txt @@ -0,0 +1,243 @@ +libperf-sampling(7) +=================== + +NAME +---- +libperf-sampling - sampling interface + + +DESCRIPTION +----------- +The sampling interface provides API to meassure and get count for specific perf events. + +The following test tries to explain count on `sampling.c` example. + +It is by no means complete guide to sampling, but shows libperf basic API for sampling. + +The `sampling.c` comes with libbperf package and can be compiled and run like: + +[source,bash] +-- +$ gcc -o sampling sampling.c -lperf +$ sudo ./sampling +cpu 0, pid 0, tid 0, ip ffffffffad06c4e6, period 1 +cpu 0, pid 4465, tid 4469, ip ffffffffad118748, period 18322959 +cpu 0, pid 0, tid 0, ip ffffffffad115722, period 33544846 +cpu 0, pid 4465, tid 4470, ip 7f84fe0cdad6, period 23687474 +cpu 0, pid 0, tid 0, ip ffffffffad9e0349, period 34255790 +cpu 0, pid 4465, tid 4469, ip ffffffffad136581, period 38664069 +cpu 0, pid 0, tid 0, ip ffffffffad9e55e2, period 21922384 +cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 17655175 +... +-- + +It requires root access, because it uses hardware cycles event. + +The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it: + +- creates events +- adds them to the event list +- opens and enables events through the event list +- sleeps for 3 seconds +- disables events +- reads and displays recorded samples +- destroys the event list + +The first thing you need to do before using libperf is to call init function: + +[source,c] +-- + 12 static int libperf_print(enum libperf_print_level level, + 13 const char *fmt, va_list ap) + 14 { + 15 return vfprintf(stderr, fmt, ap); + 16 } + + 23 int main(int argc, char **argv) + 24 { + ... + 40 libperf_init(libperf_print); +-- + +It will setup the library and sets function for debug output from library. + +The `libperf_print` callback will receive any message with its debug level, +defined as: + +[source,c] +-- +enum libperf_print_level { + LIBPERF_ERR, + LIBPERF_WARN, + LIBPERF_INFO, + LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, +}; +-- + +Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`: + +[source,c] +-- + 29 struct perf_event_attr attr = { + 30 .type = PERF_TYPE_HARDWARE, + 31 .config = PERF_COUNT_HW_CPU_CYCLES, + 32 .disabled = 1, + 33 .freq = 1, + 34 .sample_freq = 10, + 35 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD, + 36 }; +-- + +Next step is to prepare cpus map. + +In this case we will monitor all the available CPUs: + +[source,c] +-- + 42 cpus = perf_cpu_map__new(NULL); + 43 if (!cpus) { + 44 fprintf(stderr, "failed to create cpus\n"); + 45 return -1; + 46 } +-- + +Now we create libperf's event list, which will serve as holder for the cycles event: + +[source,c] +-- + 48 evlist = perf_evlist__new(); + 49 if (!evlist) { + 50 fprintf(stderr, "failed to create evlist\n"); + 51 goto out_cpus; + 52 } +-- + +We create libperf's event for the cycles attribute we defined earlier and add it to the list: + +[source,c] +-- + 54 evsel = perf_evsel__new(&attr); + 55 if (!evsel) { + 56 fprintf(stderr, "failed to create cycles\n"); + 57 goto out_cpus; + 58 } + 59 + 60 perf_evlist__add(evlist, evsel); +-- + +Configure event list with the cpus map and open event: + +[source,c] +-- + 62 perf_evlist__set_maps(evlist, cpus, NULL); + 63 + 64 err = perf_evlist__open(evlist); + 65 if (err) { + 66 fprintf(stderr, "failed to open evlist\n"); + 67 goto out_evlist; + 68 } +-- + +Once the events list is open, we can create memory maps AKA perf ring buffers: + +[source,c] +-- + 70 err = perf_evlist__mmap(evlist, 4); + 71 if (err) { + 72 fprintf(stderr, "failed to mmap evlist\n"); + 73 goto out_evlist; + 74 } +-- + +The event is created as disabled (note the `disabled = 1` assignment above), +so we need to enable the events list explicitely. + +From this moment the cycles event is sampling. + +We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list. + +[source,c] +-- + 76 perf_evlist__enable(evlist); + 77 sleep(3); + 78 perf_evlist__disable(evlist); +-- + +Following code walks through the ring buffers and reads stored events/samples: + +[source,c] +-- + 80 perf_evlist__for_each_mmap(evlist, map, false) { + 81 if (perf_mmap__read_init(map) < 0) + 82 continue; + 83 + 84 while ((event = perf_mmap__read_event(map)) != NULL) { + + /* process event */ + +108 perf_mmap__consume(map); +109 } +110 perf_mmap__read_done(map); +111 } + +-- + +Each sample needs to get parsed: + +[source,c] +-- + 85 int cpu, pid, tid; + 86 __u64 ip, period, *array; + 87 union u64_swap u; + 88 + 89 array = event->sample.array; + 90 + 91 ip = *array; + 92 array++; + 93 + 94 u.val64 = *array; + 95 pid = u.val32[0]; + 96 tid = u.val32[1]; + 97 array++; + 98 + 99 u.val64 = *array; +100 cpu = u.val32[0]; +101 array++; +102 +103 period = *array; +104 +105 fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n", +106 cpu, pid, tid, ip, period); +-- + +And finaly cleanup. + +We close the whole events list (both events) and remove it together with the threads map: + +[source,c] +-- +113 out_evlist: +114 perf_evlist__delete(evlist); +115 out_cpus: +116 perf_cpu_map__put(cpus); +117 return err; +118 } +-- + +REPORTING BUGS +-------------- +Report bugs to <linux-perf-users@vger.kernel.org>. + +LICENSE +------- +libperf is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + +SEE ALSO +-------- +libperf(3), libperf-counting(7) diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt new file mode 100644 index 000000000000..5a6bb512789d --- /dev/null +++ b/tools/lib/perf/Documentation/libperf.txt @@ -0,0 +1,246 @@ +libperf(3) +========== + +NAME +---- +libperf - Linux kernel perf event library + + +SYNOPSIS +-------- +*Generic API:* + +[source,c] +-- + #include <perf/core.h> + + enum libperf_print_level { + LIBPERF_ERR, + LIBPERF_WARN, + LIBPERF_INFO, + LIBPERF_DEBUG, + LIBPERF_DEBUG2, + LIBPERF_DEBUG3, + }; + + typedef int (*libperf_print_fn_t)(enum libperf_print_level level, + const char *, va_list ap); + + void libperf_init(libperf_print_fn_t fn); +-- + +*API to handle cpu maps:* + +[source,c] +-- + #include <perf/cpumap.h> + + struct perf_cpu_map; + + struct perf_cpu_map *perf_cpu_map__dummy_new(void); + struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); + struct perf_cpu_map *perf_cpu_map__read(FILE *file); + struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); + struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, + struct perf_cpu_map *other); + void perf_cpu_map__put(struct perf_cpu_map *map); + int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); + int perf_cpu_map__nr(const struct perf_cpu_map *cpus); + bool perf_cpu_map__empty(const struct perf_cpu_map *map); + int perf_cpu_map__max(struct perf_cpu_map *map); + + #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) +-- + +*API to handle thread maps:* + +[source,c] +-- + #include <perf/threadmap.h> + + struct perf_thread_map; + + struct perf_thread_map *perf_thread_map__new_dummy(void); + + void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); + char *perf_thread_map__comm(struct perf_thread_map *map, int thread); + int perf_thread_map__nr(struct perf_thread_map *threads); + pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); + + struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); + void perf_thread_map__put(struct perf_thread_map *map); +-- + +*API to handle event lists:* + +[source,c] +-- + #include <perf/evlist.h> + + struct perf_evlist; + + void perf_evlist__add(struct perf_evlist *evlist, + struct perf_evsel *evsel); + void perf_evlist__remove(struct perf_evlist *evlist, + struct perf_evsel *evsel); + struct perf_evlist *perf_evlist__new(void); + void perf_evlist__delete(struct perf_evlist *evlist); + struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist, + struct perf_evsel *evsel); + int perf_evlist__open(struct perf_evlist *evlist); + void perf_evlist__close(struct perf_evlist *evlist); + void perf_evlist__enable(struct perf_evlist *evlist); + void perf_evlist__disable(struct perf_evlist *evlist); + + #define perf_evlist__for_each_evsel(evlist, pos) + + void perf_evlist__set_maps(struct perf_evlist *evlist, + struct perf_cpu_map *cpus, + struct perf_thread_map *threads); + int perf_evlist__poll(struct perf_evlist *evlist, int timeout); + int perf_evlist__filter_pollfd(struct perf_evlist *evlist, + short revents_and_mask); + + int perf_evlist__mmap(struct perf_evlist *evlist, int pages); + void perf_evlist__munmap(struct perf_evlist *evlist); + + struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, + struct perf_mmap *map, + bool overwrite); + + #define perf_evlist__for_each_mmap(evlist, pos, overwrite) +-- + +*API to handle events:* + +[source,c] +-- + #include <perf/evsel.h>* + + struct perf_evsel; + + struct perf_counts_values { + union { + struct { + uint64_t val; + uint64_t ena; + uint64_t run; + }; + uint64_t values[3]; + }; + }; + + struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr); + void perf_evsel__delete(struct perf_evsel *evsel); + int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, + struct perf_thread_map *threads); + void perf_evsel__close(struct perf_evsel *evsel); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); + int perf_evsel__enable(struct perf_evsel *evsel); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__disable(struct perf_evsel *evsel); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); + struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); + struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); + struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +-- + +*API to handle maps (perf ring buffers):* + +[source,c] +-- + #include <perf/mmap.h> + + struct perf_mmap; + + void perf_mmap__consume(struct perf_mmap *map); + int perf_mmap__read_init(struct perf_mmap *map); + void perf_mmap__read_done(struct perf_mmap *map); + union perf_event *perf_mmap__read_event(struct perf_mmap *map); +-- + +*Structures to access perf API events:* + +[source,c] +-- + #include <perf/event.h> + + struct perf_record_mmap; + struct perf_record_mmap2; + struct perf_record_comm; + struct perf_record_namespaces; + struct perf_record_fork; + struct perf_record_lost; + struct perf_record_lost_samples; + struct perf_record_read; + struct perf_record_throttle; + struct perf_record_ksymbol; + struct perf_record_bpf_event; + struct perf_record_sample; + struct perf_record_switch; + struct perf_record_header_attr; + struct perf_record_record_cpu_map; + struct perf_record_cpu_map_data; + struct perf_record_cpu_map; + struct perf_record_event_update_cpus; + struct perf_record_event_update_scale; + struct perf_record_event_update; + struct perf_trace_event_type; + struct perf_record_header_event_type; + struct perf_record_header_tracing_data; + struct perf_record_header_build_id; + struct perf_record_id_index; + struct perf_record_auxtrace_info; + struct perf_record_auxtrace; + struct perf_record_auxtrace_error; + struct perf_record_aux; + struct perf_record_itrace_start; + struct perf_record_thread_map_entry; + struct perf_record_thread_map; + struct perf_record_stat_config_entry; + struct perf_record_stat_config; + struct perf_record_stat; + struct perf_record_stat_round; + struct perf_record_time_conv; + struct perf_record_header_feature; + struct perf_record_compressed; +-- + +DESCRIPTION +----------- +The libperf library provides an API to access the linux kernel perf +events subsystem. + +Following objects are key to the libperf interface: + +[horizontal] + +struct perf_cpu_map:: Provides a cpu list abstraction. + +struct perf_thread_map:: Provides a thread list abstraction. + +struct perf_evsel:: Provides an abstraction for single a perf event. + +struct perf_evlist:: Gathers several struct perf_evsel object and performs functions on all of them. + +struct perf_mmap:: Provides an abstraction for accessing perf ring buffer. + +The exported API functions bind these objects together. + +REPORTING BUGS +-------------- +Report bugs to <linux-perf-users@vger.kernel.org>. + +LICENSE +------- +libperf is Free Software licensed under the GNU LGPL 2.1 + +RESOURCES +--------- +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + +SEE ALSO +-------- +libperf-sampling(7), libperf-counting(7) diff --git a/tools/lib/perf/Documentation/manpage-1.72.xsl b/tools/lib/perf/Documentation/manpage-1.72.xsl new file mode 100644 index 000000000000..b4d315cb8c47 --- /dev/null +++ b/tools/lib/perf/Documentation/manpage-1.72.xsl @@ -0,0 +1,14 @@ +<!-- manpage-1.72.xsl: + special settings for manpages rendered from asciidoc+docbook + handles peculiarities in docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the special values for the roff control characters + needed for docbook-xsl 1.72.0 --> +<xsl:param name="git.docbook.backslash">▓</xsl:param> +<xsl:param name="git.docbook.dot" >⌂</xsl:param> + +</xsl:stylesheet> diff --git a/tools/lib/perf/Documentation/manpage-base.xsl b/tools/lib/perf/Documentation/manpage-base.xsl new file mode 100644 index 000000000000..a264fa616093 --- /dev/null +++ b/tools/lib/perf/Documentation/manpage-base.xsl @@ -0,0 +1,35 @@ +<!-- manpage-base.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- these params silence some output from xmlto --> +<xsl:param name="man.output.quietly" select="1"/> +<xsl:param name="refentry.meta.get.quietly" select="1"/> + +<!-- convert asciidoc callouts to man page format; + git.docbook.backslash and git.docbook.dot params + must be supplied by another XSL file or other means --> +<xsl:template match="co"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB(', + substring-after(@id,'-'),')', + $git.docbook.backslash,'fR')"/> +</xsl:template> +<xsl:template match="calloutlist"> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>sp </xsl:text> + <xsl:apply-templates/> + <xsl:text> </xsl:text> +</xsl:template> +<xsl:template match="callout"> + <xsl:value-of select="concat( + $git.docbook.backslash,'fB', + substring-after(@arearefs,'-'), + '. ',$git.docbook.backslash,'fR')"/> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.dot"/> + <xsl:text>br </xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/tools/lib/perf/Documentation/manpage-bold-literal.xsl b/tools/lib/perf/Documentation/manpage-bold-literal.xsl new file mode 100644 index 000000000000..608eb5df6281 --- /dev/null +++ b/tools/lib/perf/Documentation/manpage-bold-literal.xsl @@ -0,0 +1,17 @@ +<!-- manpage-bold-literal.xsl: + special formatting for manpages rendered from asciidoc+docbook --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- render literal text as bold (instead of plain or monospace); + this makes literal text easier to distinguish in manpages + viewed on a tty --> +<xsl:template match="literal"> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fB</xsl:text> + <xsl:apply-templates/> + <xsl:value-of select="$git.docbook.backslash"/> + <xsl:text>fR</xsl:text> +</xsl:template> + +</xsl:stylesheet> diff --git a/tools/lib/perf/Documentation/manpage-normal.xsl b/tools/lib/perf/Documentation/manpage-normal.xsl new file mode 100644 index 000000000000..a48f5b11f3dc --- /dev/null +++ b/tools/lib/perf/Documentation/manpage-normal.xsl @@ -0,0 +1,13 @@ +<!-- manpage-normal.xsl: + special settings for manpages rendered from asciidoc+docbook + handles anything we want to keep away from docbook-xsl 1.72.0 --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<xsl:import href="manpage-base.xsl"/> + +<!-- these are the normal values for the roff control characters --> +<xsl:param name="git.docbook.backslash">\</xsl:param> +<xsl:param name="git.docbook.dot" >.</xsl:param> + +</xsl:stylesheet> diff --git a/tools/lib/perf/Documentation/manpage-suppress-sp.xsl b/tools/lib/perf/Documentation/manpage-suppress-sp.xsl new file mode 100644 index 000000000000..a63c7632a87d --- /dev/null +++ b/tools/lib/perf/Documentation/manpage-suppress-sp.xsl @@ -0,0 +1,21 @@ +<!-- manpage-suppress-sp.xsl: + special settings for manpages rendered from asciidoc+docbook + handles erroneous, inline .sp in manpage output of some + versions of docbook-xsl --> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + version="1.0"> + +<!-- attempt to work around spurious .sp at the tail of the line + that some versions of docbook stylesheets seem to add --> +<xsl:template match="simpara"> + <xsl:variable name="content"> + <xsl:apply-templates/> + </xsl:variable> + <xsl:value-of select="normalize-space($content)"/> + <xsl:if test="not(ancestor::authorblurb) and + not(ancestor::personblurb)"> + <xsl:text> </xsl:text> + </xsl:if> +</xsl:template> + +</xsl:stylesheet> diff --git a/tools/perf/lib/Makefile b/tools/lib/perf/Makefile index 0f233638ef1f..3718d65cffac 100644 --- a/tools/perf/lib/Makefile +++ b/tools/lib/perf/Makefile @@ -60,7 +60,7 @@ else endif INCLUDES = \ --I$(srctree)/tools/perf/lib/include \ +-I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(SRCARCH)/include/ \ @@ -181,7 +181,10 @@ install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ $(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644) -install: install_lib install_headers install_pkgconfig +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +install: install_lib install_headers install_pkgconfig install_doc FORCE: diff --git a/tools/perf/lib/core.c b/tools/lib/perf/core.c index 58fc894b76c5..58fc894b76c5 100644 --- a/tools/perf/lib/core.c +++ b/tools/lib/perf/core.c diff --git a/tools/perf/lib/cpumap.c b/tools/lib/perf/cpumap.c index f93f4e703e4c..f93f4e703e4c 100644 --- a/tools/perf/lib/cpumap.c +++ b/tools/lib/perf/cpumap.c diff --git a/tools/perf/lib/evlist.c b/tools/lib/perf/evlist.c index ae9e65aa2491..5b9f2ca50591 100644 --- a/tools/perf/lib/evlist.c +++ b/tools/lib/perf/evlist.c @@ -164,6 +164,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, evlist->threads = perf_thread_map__get(threads); } + if (!evlist->all_cpus && cpus) + evlist->all_cpus = perf_cpu_map__get(cpus); + perf_evlist__propagate_maps(evlist); } diff --git a/tools/perf/lib/evsel.c b/tools/lib/perf/evsel.c index 4dc06289f4c7..4dc06289f4c7 100644 --- a/tools/perf/lib/evsel.c +++ b/tools/lib/perf/evsel.c diff --git a/tools/perf/lib/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..840d4032587b 100644 --- a/tools/perf/lib/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 74dc8c3f0b66..74dc8c3f0b66 100644 --- a/tools/perf/lib/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1ffd083b235e..1ffd083b235e 100644 --- a/tools/perf/lib/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h diff --git a/tools/perf/lib/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..5175d491b2d4 100644 --- a/tools/perf/lib/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index be7556e0a2b2..be7556e0a2b2 100644 --- a/tools/perf/lib/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h diff --git a/tools/perf/lib/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h index 2093e8868a67..2093e8868a67 100644 --- a/tools/perf/lib/include/internal/tests.h +++ b/tools/lib/perf/include/internal/tests.h diff --git a/tools/perf/lib/include/internal/threadmap.h b/tools/lib/perf/include/internal/threadmap.h index df748baf9eda..df748baf9eda 100644 --- a/tools/perf/lib/include/internal/threadmap.h +++ b/tools/lib/perf/include/internal/threadmap.h diff --git a/tools/perf/lib/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h index 51e35d6c8ec4..51e35d6c8ec4 100644 --- a/tools/perf/lib/include/internal/xyarray.h +++ b/tools/lib/perf/include/internal/xyarray.h diff --git a/tools/perf/lib/include/perf/core.h b/tools/lib/perf/include/perf/core.h index a3f6d68edad7..a3f6d68edad7 100644 --- a/tools/perf/lib/include/perf/core.h +++ b/tools/lib/perf/include/perf/core.h diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 6a17ad730cbc..6a17ad730cbc 100644 --- a/tools/perf/lib/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h diff --git a/tools/perf/lib/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 18106899cb4e..18106899cb4e 100644 --- a/tools/perf/lib/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h index 0a7479dc13bf..0a7479dc13bf 100644 --- a/tools/perf/lib/include/perf/evlist.h +++ b/tools/lib/perf/include/perf/evlist.h diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index c82ec39a4ad0..c82ec39a4ad0 100644 --- a/tools/perf/lib/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/lib/perf/include/perf/mmap.h index 9508ad90d8b9..9508ad90d8b9 100644 --- a/tools/perf/lib/include/perf/mmap.h +++ b/tools/lib/perf/include/perf/mmap.h diff --git a/tools/perf/lib/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index a7c50de8d010..a7c50de8d010 100644 --- a/tools/perf/lib/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h diff --git a/tools/perf/lib/internal.h b/tools/lib/perf/internal.h index 2c27e158de6b..2c27e158de6b 100644 --- a/tools/perf/lib/internal.h +++ b/tools/lib/perf/internal.h diff --git a/tools/perf/lib/lib.c b/tools/lib/perf/lib.c index 18658931fc71..18658931fc71 100644 --- a/tools/perf/lib/lib.c +++ b/tools/lib/perf/lib.c diff --git a/tools/perf/lib/libperf.map b/tools/lib/perf/libperf.map index 7be1af8a546c..7be1af8a546c 100644 --- a/tools/perf/lib/libperf.map +++ b/tools/lib/perf/libperf.map diff --git a/tools/perf/lib/libperf.pc.template b/tools/lib/perf/libperf.pc.template index 117e4a237b55..117e4a237b55 100644 --- a/tools/perf/lib/libperf.pc.template +++ b/tools/lib/perf/libperf.pc.template diff --git a/tools/perf/lib/mmap.c b/tools/lib/perf/mmap.c index 79d5ed6c38cc..79d5ed6c38cc 100644 --- a/tools/perf/lib/mmap.c +++ b/tools/lib/perf/mmap.c diff --git a/tools/perf/lib/tests/Makefile b/tools/lib/perf/tests/Makefile index a43cd08c5c03..96841775feaf 100644 --- a/tools/perf/lib/tests/Makefile +++ b/tools/lib/perf/tests/Makefile @@ -16,7 +16,7 @@ all: include $(srctree)/tools/scripts/Makefile.include -INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib +INCLUDE = -I$(srctree)/tools/lib/perf/include -I$(srctree)/tools/include -I$(srctree)/tools/lib $(TESTS_A): FORCE $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI) diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c2..c8d45091e7c2 100644 --- a/tools/perf/lib/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c2504..6d8ebe0c2504 100644 --- a/tools/perf/lib/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965b..135722ac965b 100644 --- a/tools/perf/lib/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbedde..7dc4d6fbedde 100644 --- a/tools/perf/lib/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c diff --git a/tools/perf/lib/threadmap.c b/tools/lib/perf/threadmap.c index e92c368b0a6c..e92c368b0a6c 100644 --- a/tools/perf/lib/threadmap.c +++ b/tools/lib/perf/threadmap.c diff --git a/tools/perf/lib/xyarray.c b/tools/lib/perf/xyarray.c index dcd901d154bb..dcd901d154bb 100644 --- a/tools/perf/lib/xyarray.c +++ b/tools/lib/perf/xyarray.c diff --git a/tools/lib/string.c b/tools/lib/string.c index f2ae1b87c719..f645343815de 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -96,6 +96,10 @@ int strtobool(const char *s, bool *res) * If libc has strlcpy() then that version will override this * implementation: */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wignored-attributes" +#endif size_t __weak strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -107,6 +111,9 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif /** * skip_spaces - Removes leading whitespace from @str. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index d2a19b0bc05a..ee08aeff30a1 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -2,10 +2,6 @@ include ../scripts/Makefile.include include ../scripts/Makefile.arch -ifeq ($(ARCH),x86_64) -ARCH := x86 -endif - # always use the host compiler HOSTAR ?= ar HOSTCC ?= gcc @@ -33,7 +29,7 @@ all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ - -I$(srctree)/tools/arch/$(ARCH)/include + -I$(srctree)/tools/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 9bd04bbed01e..2a1261bfbb62 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -48,5 +48,3 @@ check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[ check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' - -cd - diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e8c972f89357..1b5042f134a8 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -112,6 +112,12 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --skip-missing:: Skip symbols that cannot be annotated. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c4dd23c4b478..8ead55593984 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -239,7 +239,6 @@ buildid.*:: set buildid.dir to /dev/null. The default is $HOME/.debug annotate.*:: - These options work only for TUI. These are in control of addresses, jump function, source code in lines of assembly code from a specific program. @@ -269,6 +268,8 @@ annotate.*:: │ mov (%rdi),%rdx │ return n; + This option works with tui, stdio2 browsers. + annotate.use_offset:: Basing on a first address of a loaded function, offset can be used. Instead of using original addresses of assembly code, @@ -287,6 +288,8 @@ annotate.*:: 368:│ mov 0x8(%r14),%rdi + This option works with tui, stdio2 browsers. + annotate.jump_arrows:: There can be jump instruction among assembly code. Depending on a boolean value of jump_arrows, @@ -306,6 +309,8 @@ annotate.*:: │1330: mov %r15,%r10 │1333: cmp %r15,%r14 + This option works with tui browser. + annotate.show_linenr:: When showing source code if this option is 'true', line numbers are printed as below. @@ -325,6 +330,8 @@ annotate.*:: │ array++; │ } + This option works with tui, stdio2 browsers. + annotate.show_nr_jumps:: Let's see a part of assembly code. @@ -335,6 +342,8 @@ annotate.*:: │1 1382: movb $0x1,-0x270(%rbp) + This option works with tui, stdio2 browsers. + annotate.show_total_period:: To compare two records on an instruction base, with this option provided, display total number of samples that belong to a line @@ -348,11 +357,30 @@ annotate.*:: 99.93 │ mov %eax,%eax + This option works with tui, stdio2, stdio browsers. + + annotate.show_nr_samples:: + By default perf annotate shows percentage of samples. This option + can be used to print absolute number of samples. Ex, when set as + false: + + Percent│ + 74.03 │ mov %fs:0x28,%rax + + When set as true: + + Samples│ + 6 │ mov %fs:0x28,%rax + + This option works with tui, stdio2, stdio browsers. + annotate.offset_level:: Default is '1', meaning just jump targets will have offsets show right beside the instruction. When set to '2' 'call' instructions will also have its offsets shown, 3 or higher will show offsets for all instructions. + This option works with tui, stdio2 browsers. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - @@ -490,6 +518,12 @@ top.*:: column by default. The default is 'true'. + top.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'top' subcommand. This option ONLY setup + the unwind method. To enable 'perf top' to actually use it, + the command line option -g must be specified. + man.*:: man.viewer:: This option can assign a tool to view manual pages when 'help' @@ -517,6 +551,16 @@ record.*:: But if this option is 'no-cache', it will not update the build-id cache. 'skip' skips post-processing and does not update the cache. + record.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'record' subcommand. This option ONLY setup + the unwind method. To enable 'perf record' to actually use it, + the command line option -g must be specified. + + record.aio:: + Use 'n' control blocks in asynchronous (Posix AIO) trace writing + mode ('n' default: 1, max: 4). + diff.*:: diff.order:: This option sets the number of columns to sort the result. @@ -566,6 +610,11 @@ trace.*:: "libbeauty", the default, to use the same argument beautifiers used in the strace-like sys_enter+sys_exit lines. +ftrace.*:: + ftrace.tracer:: + Can be used to select the default tracer. Possible values are + 'function' and 'function_graph'. + llvm.*:: llvm.clang-path:: Path to clang. If omit, search it from $PATH. @@ -610,6 +659,29 @@ scripts.*:: The script gets the same options passed as a full perf script, in particular -i perfdata file, --cpu, --tid +convert.*:: + + convert.queue-size:: + Limit the size of ordered_events queue, so we could control + allocation size of perf data files without proper finished + round events. + +intel-pt.*:: + + intel-pt.cache-divisor:: + + intel-pt.mispred-all:: + If set, Intel PT decoder will set the mispred flag on all + branches. + +auxtrace.*:: + + auxtrace.dumpdir:: + s390 only. The directory to save the auxiliary trace buffer + can be changed using this option. Ex, auxtrace.dumpdir=/tmp. + If the directory does not exist or has the wrong file type, + the current directory is used. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8dbe2119686a..db61f16ffa56 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -367,6 +367,12 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --group:: Show event group information together. It forces group output also if there are no groups defined in data file. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 63f938b887dd..5fbe42bd599b 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -110,6 +110,10 @@ OPTIONS for 'perf sched timehist' --max-stack:: Maximum number of functions to display in backtrace, default 5. +-C=:: +--cpu=:: + Only show events for the given CPU(s) (comma separated list). + -p=:: --pid=:: Only show events for given process ID (comma separated list). diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 5596129a71cf..324b6b53c86b 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,12 @@ Default is to monitor all CPUS. -M:: --disassembler-style=:: Set disassembler style for objdump. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --source:: Interleave source code with assembly code. Enabled by default, disable with --no-source. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 4934edb5adfd..5d7b947320fb 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -7,6 +7,7 @@ tools/lib/traceevent tools/lib/api tools/lib/bpf tools/lib/subcmd +tools/lib/perf tools/lib/argv_split.c tools/lib/ctype.c tools/lib/hweight.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c90f4146e5a2..80e55e796be9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -286,7 +286,7 @@ ifeq ($(DEBUG),0) endif endif -INC_FLAGS += -I$(src-perf)/lib/include +INC_FLAGS += -I$(srctree)/tools/lib/perf/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index eae5d5e95952..3eda9d4b88e7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -230,7 +230,7 @@ LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ -LIBPERF_DIR = $(srctree)/tools/perf/lib/ +LIBPERF_DIR = $(srctree)/tools/lib/perf/ # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. # Without this setting the output feature dump file misses some features, for diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index ede040cf82ad..941f814820b8 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -226,7 +226,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) continue; - sink = term->val.drv_cfg; + sink = term->val.str; snprintf(path, PATH_MAX, "sinks/%s", sink); ret = perf_pmu__scan_file(pmu, path, "%x", &hash); @@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) free(ptr); } -static int cs_etm_read_finish(struct auxtrace_record *itr, int idx) -{ - struct cs_etm_recording *ptr = - container_of(itr, struct cs_etm_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->cs_etm_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, - evsel, idx); - } - - return -EINVAL; -} - struct auxtrace_record *cs_etm_record_init(int *err) { struct perf_pmu *cs_etm_pmu; @@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) } ptr->cs_etm_pmu = cs_etm_pmu; + ptr->itr.pmu = cs_etm_pmu; ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options; ptr->itr.recording_options = cs_etm_recording_options; ptr->itr.info_priv_size = cs_etm_info_priv_size; @@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) ptr->itr.snapshot_finish = cs_etm_snapshot_finish; ptr->itr.reference = cs_etm_reference; ptr->itr.free = cs_etm_recording_free; - ptr->itr.read_finish = cs_etm_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; *err = 0; return &ptr->itr; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index eba6541ec0f1..27653be24447 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -11,17 +11,17 @@ #include <linux/zalloc.h> #include <time.h> -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/session.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/session.h" #include <internal/lib.h> // page_size -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/arm-spe.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/arm-spe.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) free(sper); } -static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) -{ - struct arm_spe_recording *sper = - container_of(itr, struct arm_spe_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(sper->evlist, evsel) { - if (evsel->core.attr.type == sper->arm_spe_pmu->type) - return perf_evlist__enable_event_idx(sper->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *arm_spe_recording_init(int *err, struct perf_pmu *arm_spe_pmu) { @@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err, } sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.pmu = arm_spe_pmu; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; sper->itr.free = arm_spe_recording_free; sper->itr.reference = arm_spe_reference; - sper->itr.read_finish = arm_spe_read_finish; + sper->itr.read_finish = auxtrace_record__read_finish; sper->itr.alignment = 0; *err = 0; diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index a32e4b72a98f..d730666ab95d 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -1,8 +1,10 @@ #include <stdio.h> #include <stdlib.h> #include <perf/cpumap.h> +#include <util/cpumap.h> #include <internal/cpumap.h> #include <api/fs/fs.h> +#include <errno.h> #include "debug.h" #include "header.h" @@ -12,26 +14,21 @@ #define MIDR_VARIANT_SHIFT 20 #define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) -char *get_cpuid_str(struct perf_pmu *pmu) +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) { - char *buf = NULL; - char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); - int cpu; u64 midr = 0; - struct perf_cpu_map *cpus; - FILE *file; + int cpu; - if (!sysfs || !pmu || !pmu->cpus) - return NULL; + if (!sysfs || sz < MIDR_SIZE) + return EINVAL; - buf = malloc(MIDR_SIZE); - if (!buf) - return NULL; + cpus = perf_cpu_map__get(cpus); - /* read midr from list of cpus mapped to this pmu */ - cpus = perf_cpu_map__get(pmu->cpus); for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { + char path[PATH_MAX]; + FILE *file; + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, sysfs, cpus->map[cpu]); @@ -57,12 +54,48 @@ char *get_cpuid_str(struct perf_pmu *pmu) break; } - if (!midr) { + perf_cpu_map__put(cpus); + + if (!midr) + return EINVAL; + + return 0; +} + +int get_cpuid(char *buf, size_t sz) +{ + struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + int ret; + + if (!cpus) + return EINVAL; + + ret = _get_cpuid(buf, sz, cpus); + + perf_cpu_map__put(cpus); + + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu) +{ + char *buf = NULL; + int res; + + if (!pmu || !pmu->cpus) + return NULL; + + buf = malloc(MIDR_SIZE); + if (!buf) + return NULL; + + /* read midr from list of cpus mapped to this pmu */ + res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + if (res) { pr_err("failed to get cpuid string for PMU %s\n", pmu->name); free(buf); buf = NULL; } - perf_cpu_map__put(cpus); return buf; } diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2864e2e3776d..2833e101a7c6 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/perf_regs.h" +#include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG_END diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 43f736ed47f2..35b61bfc1b1a 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -517,3 +517,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 nospu clone3 ppc_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index e9c436eeffc9..0a5242900248 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -4,8 +4,8 @@ #include <regex.h> #include <linux/zalloc.h> -#include "../../util/perf_regs.h" -#include "../../util/debug.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" #include <linux/kernel.h> diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index c29976eca4a8..44d510bc9b78 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -357,6 +357,8 @@ 433 common fspick __x64_sys_fspick 434 common pidfd_open __x64_sys_pidfd_open 435 common clone3 __x64_sys_clone3/ptregs +437 common openat2 __x64_sys_openat2 +438 common pidfd_getfd __x64_sys_pidfd_getfd # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 7abc9fd4cbec..3da506e13f49 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -7,13 +7,13 @@ #include <errno.h> #include <stdbool.h> -#include "../../util/header.h" -#include "../../util/debug.h" -#include "../../util/pmu.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/evlist.h" +#include "../../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/evlist.h" static struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index ac45015cc6ba..047dc00eafa6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,12 +3,12 @@ #include <linux/string.h> #include <linux/zalloc.h> -#include "../../util/event.h" -#include "../../util/synthetic-events.h" -#include "../../util/machine.h" -#include "../../util/tool.h" -#include "../../util/map.h" -#include "../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index aa6deb463bf3..578c8c568ffd 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -7,8 +7,8 @@ #include <string.h> #include <regex.h> -#include "../../util/debug.h" -#include "../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/header.h" static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 27d9e214d068..09f93800bffd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -11,18 +11,18 @@ #include <linux/log2.h> #include <linux/zalloc.h> -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/mmap.h" -#include "../../util/session.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/record.h" -#include "../../util/tsc.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-bts.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/tsc.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-bts.h" #include <internal/lib.h> // page_size #define KiB(x) ((x) * 1024) @@ -413,20 +413,6 @@ out_err: return err; } -static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_bts_recording *btsr = - container_of(itr, struct intel_bts_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(btsr->evlist, evsel) { - if (evsel->core.attr.type == btsr->intel_bts_pmu->type) - return perf_evlist__enable_event_idx(btsr->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_bts_recording_init(int *err) { struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); @@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) } btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.pmu = intel_bts_pmu; btsr->itr.recording_options = intel_bts_recording_options; btsr->itr.info_priv_size = intel_bts_info_priv_size; btsr->itr.info_fill = intel_bts_info_fill; @@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) btsr->itr.find_snapshot = intel_bts_find_snapshot; btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; btsr->itr.reference = intel_bts_reference; - btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.read_finish = auxtrace_record__read_finish; btsr->itr.alignment = sizeof(struct branch); return &btsr->itr; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 20df442fdf36..1643aed8c4c8 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,23 +13,23 @@ #include <linux/zalloc.h> #include <cpuid.h> -#include "../../util/session.h" -#include "../../util/event.h" -#include "../../util/evlist.h" -#include "../../util/evsel.h" -#include "../../util/evsel_config.h" -#include "../../util/cpumap.h" -#include "../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" +#include "../../../util/cpumap.h" +#include "../../../util/mmap.h" #include <subcmd/parse-options.h> -#include "../../util/parse-events.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/target.h" -#include "../../util/tsc.h" +#include "../../../util/parse-events.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/target.h" +#include "../../../util/tsc.h" #include <internal/lib.h> // page_size -#include "../../util/intel-pt.h" +#include "../../../util/intel-pt.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } -static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_pt_recording *ptr = - container_of(itr, struct intel_pt_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->intel_pt_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, evsel, - idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); @@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) } ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; ptr->itr.info_fill = intel_pt_info_fill; @@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) ptr->itr.find_snapshot = intel_pt_find_snapshot; ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; ptr->itr.reference = intel_pt_reference; - ptr->itr.read_finish = intel_pt_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; /* * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K * should give at least 1 PSB per sample. diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index e17e080e76f4..31679c35d493 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -5,9 +5,9 @@ #include <stdlib.h> #include <internal/lib.h> // page_size -#include "../../util/machine.h" -#include "../../util/map.h" -#include "../../util/symbol.h" +#include "../../../util/machine.h" +#include "../../../util/map.h" +#include "../../../util/symbol.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c218b83e063b..fca81b39b09f 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -5,10 +5,10 @@ #include <linux/kernel.h> #include <linux/zalloc.h> -#include "../../perf-sys.h" -#include "../../util/perf_regs.h" -#include "../../util/debug.h" -#include "../../util/event.h" +#include "../../../perf-sys.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e33ef5bc31c5..d48d608517fd 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -4,9 +4,9 @@ #include <linux/stddef.h> #include <linux/perf_event.h> -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/pmu.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/pmu.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index fddb3ced9db6..4aa6de1aa67d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -2,6 +2,10 @@ #ifndef BENCH_H #define BENCH_H +#include <sys/time.h> + +extern struct timeval bench__start, bench__end, bench__runtime; + /* * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index bb617e568841..cadc18d42aa4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -35,7 +35,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool done, __verbose, randomize; /* @@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void nest_epollfd(void) @@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); @@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv) threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 7af694437f4e..f938c585d512 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -90,7 +90,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool wdone, done, __verbose, randomize |