From dd399ac9e343c7573c47d6820e4a23013c54749d Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 28 Mar 2019 11:33:53 +0000 Subject: tools/bpf: generate pkg-config file for libbpf Generate a libbpf.pc file at build time so that users can rely on pkg-config to find the library, its CFLAGS and LDFLAGS. Signed-off-by: Luca Boccassi Acked-by: Andrey Ignatov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/.gitignore | 1 + tools/lib/bpf/Makefile | 18 +++++++++++++++--- tools/lib/bpf/libbpf.pc.template | 12 ++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 tools/lib/bpf/libbpf.pc.template (limited to 'tools/lib') diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 4db74758c674..7d9e182a1f51 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,3 +1,4 @@ libbpf_version.h +libbpf.pc FEATURE-DUMP.libbpf test_libbpf diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 5bf8e52c41fc..2a578bfc0bca 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -90,6 +90,7 @@ LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) LIB_FILE = libbpf.a libbpf.so* +PC_FILE = libbpf.pc # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -134,13 +135,14 @@ VERSION_SCRIPT := libbpf.map LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_TARGET) +CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -187,6 +189,12 @@ $(OUTPUT)libbpf.a: $(BPF_IN) $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ +$(OUTPUT)libbpf.pc: + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(LIBBPF_VERSION)|" \ + < libbpf.pc.template > $@ + check: check_abi check_abi: $(OUTPUT)libbpf.so @@ -223,7 +231,11 @@ install_headers: $(call do_install,libbpf.h,$(prefix)/include/bpf,644); $(call do_install,btf.h,$(prefix)/include/bpf,644); -install: install_lib +install_pkgconfig: $(PC_FILE) + $(call QUIET_INSTALL, $(PC_FILE)) \ + $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644) + +install: install_lib install_pkgconfig ### Cleaning rules @@ -233,7 +245,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd *.pc LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template new file mode 100644 index 000000000000..ac17fcef2108 --- /dev/null +++ b/tools/lib/bpf/libbpf.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libbpf +Description: BPF library +Version: @VERSION@ +Libs: -L${libdir} -lbpf +Requires.private: libelf +Cflags: -I${includedir} -- cgit v1.2.3-59-g8ed1b From da11b417583ece875f862d84578259a2ab27ad86 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 1 Apr 2019 21:27:47 -0700 Subject: libbpf: teach libbpf about log_level bit 2 Allow bpf_prog_load_xattr() to specify log_level for program loading. Teach libbpf to accept log_level with bit 2 set. Increase default BPF_LOG_BUF_SIZE from 256k to 16M. There is no downside to increase it to a maximum allowed by old kernels. Existing 256k limit caused ENOSPC errors and users were not able to see verifier error which is printed at the end of the verifier log. If ENOSPC is hit, double the verifier log and try again to capture the verifier error. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 2 +- tools/lib/bpf/bpf.h | 2 +- tools/lib/bpf/libbpf.c | 16 ++++++++++++++-- tools/lib/bpf/libbpf.h | 1 + 4 files changed, 17 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9cd015574e83..a1db869a6fda 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -223,7 +223,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, return -EINVAL; log_level = load_attr->log_level; - if (log_level > 2 || (log_level && !log_buf)) + if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) return -EINVAL; name_len = load_attr->name ? strlen(load_attr->name) : 0; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6ffdd79bea89..e2c0df7b831f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -92,7 +92,7 @@ struct bpf_load_program_attr { #define MAPS_RELAX_COMPAT 0x01 /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE (256 * 1024) +#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */ LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 11c25d9ea431..e1e4d35cf08e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -152,6 +152,7 @@ struct bpf_program { }; } *reloc_desc; int nr_reloc; + int log_level; struct { int nr; @@ -1494,6 +1495,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; + int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; int ret; @@ -1514,21 +1516,30 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info = prog->line_info; load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; + load_attr.log_level = prog->log_level; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; - log_buf = malloc(BPF_LOG_BUF_SIZE); +retry_load: + log_buf = malloc(log_buf_size); if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); if (ret >= 0) { + if (load_attr.log_level) + pr_debug("verifier log:\n%s", log_buf); *pfd = ret; ret = 0; goto out; } + if (errno == ENOSPC) { + log_buf_size <<= 1; + free(log_buf); + goto retry_load; + } ret = -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("load bpf program failed: %s\n", cp); @@ -2938,6 +2949,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, expected_attach_type); + prog->log_level = attr->log_level; if (!first_prog) first_prog = prog; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c70785cc8ef5..531323391d07 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -314,6 +314,7 @@ struct bpf_prog_load_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; int ifindex; + int log_level; }; LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, -- cgit v1.2.3-59-g8ed1b From ff466b58055f2d28d8ddc1388af312e87a693efe Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Sat, 6 Apr 2019 22:37:34 -0700 Subject: libbpf: Ignore -Wformat-nonliteral warning vsprintf() in __base_pr() uses nonliteral format string and it breaks compilation for those who provide corresponding extra CFLAGS, e.g.: https://github.com/libbpf/libbpf/issues/27 If libbpf is built with the flags from PR: libbpf.c:68:26: error: format string is not a string literal [-Werror,-Wformat-nonliteral] return vfprintf(stderr, format, args); ^~~~~~ 1 error generated. Ignore this warning since the use case in libbpf.c is legit. Signed-off-by: Andrey Ignatov Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e1e4d35cf08e..1ebfe7943d53 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -52,6 +52,11 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +/* vsprintf() in __base_pr() uses nonliteral format string. It may break + * compilation if user enables corresponding warning. Disable it explicitly. + */ +#pragma GCC diagnostic ignored "-Wformat-nonliteral" + #define __printf(a, b) __attribute__((format(printf, a, b))) static int __base_pr(enum libbpf_print_level level, const char *format, -- cgit v1.2.3-59-g8ed1b From f8c7a4d4dc39991c1bc05847ea4378282708f935 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 9 Apr 2019 23:20:12 +0200 Subject: bpf, libbpf: refactor relocation handling Adjust the code for relocations slightly with no functional changes, so that upcoming patches that will introduce support for relocations into the .data, .rodata and .bss sections can be added independent of these changes. Signed-off-by: Joe Stringer Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 62 ++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1ebfe7943d53..6dba0f01673b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -871,20 +871,20 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.symbols = data; obj->efile.strtabidx = sh.sh_link; } - } else if ((sh.sh_type == SHT_PROGBITS) && - (sh.sh_flags & SHF_EXECINSTR) && - (data->d_size > 0)) { - if (strcmp(name, ".text") == 0) - obj->efile.text_shndx = idx; - err = bpf_object__add_program(obj, data->d_buf, - data->d_size, name, idx); - if (err) { - char errmsg[STRERR_BUFSIZE]; - char *cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); - - pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, cp); + } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { + if (sh.sh_flags & SHF_EXECINSTR) { + if (strcmp(name, ".text") == 0) + obj->efile.text_shndx = idx; + err = bpf_object__add_program(obj, data->d_buf, + data->d_size, name, idx); + if (err) { + char errmsg[STRERR_BUFSIZE]; + char *cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); + + pr_warning("failed to alloc program %s (%s): %s", + name, obj->path, cp); + } } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -1046,24 +1046,26 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__RELOC; } - /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].offset == sym.st_value) { - pr_debug("relocation: find map %zd (%s) for insn %u\n", - map_idx, maps[map_idx].name, insn_idx); - break; + if (sym.st_shndx == maps_shndx) { + /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].offset == sym.st_value) { + pr_debug("relocation: find map %zd (%s) for insn %u\n", + map_idx, maps[map_idx].name, insn_idx); + break; + } } - } - if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d large than %d\n", - (int)map_idx, (int)nr_maps - 1); - return -LIBBPF_ERRNO__RELOC; - } + if (map_idx >= nr_maps) { + pr_warning("bpf relocation: map_idx %d large than %d\n", + (int)map_idx, (int)nr_maps - 1); + return -LIBBPF_ERRNO__RELOC; + } - prog->reloc_desc[i].type = RELO_LD64; - prog->reloc_desc[i].insn_idx = insn_idx; - prog->reloc_desc[i].map_idx = map_idx; + prog->reloc_desc[i].type = RELO_LD64; + prog->reloc_desc[i].insn_idx = insn_idx; + prog->reloc_desc[i].map_idx = map_idx; + } } return 0; } @@ -1425,7 +1427,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) } insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; insns[insn_idx].imm = obj->maps[map_idx].fd; - } else { + } else if (prog->reloc_desc[i].type == RELO_CALL) { err = bpf_program__reloc_text(prog, obj, &prog->reloc_desc[i]); if (err) -- cgit v1.2.3-59-g8ed1b From d859900c4c56dc4f0f8894c92a01dad86917453e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:13 +0200 Subject: bpf, libbpf: support global data/bss/rodata sections This work adds BPF loader support for global data sections to libbpf. This allows to write BPF programs in more natural C-like way by being able to define global variables and const data. Back at LPC 2018 [0] we presented a first prototype which implemented support for global data sections by extending BPF syscall where union bpf_attr would get additional memory/size pair for each section passed during prog load in order to later add this base address into the ldimm64 instruction along with the user provided offset when accessing a variable. Consensus from LPC was that for proper upstream support, it would be more desirable to use maps instead of bpf_attr extension as this would allow for introspection of these sections as well as potential live updates of their content. This work follows this path by taking the following steps from loader side: 1) In bpf_object__elf_collect() step we pick up ".data", ".rodata", and ".bss" section information. 2) If present, in bpf_object__init_internal_map() we add maps to the obj's map array that corresponds to each of the present sections. Given section size and access properties can differ, a single entry array map is created with value size that is corresponding to the ELF section size of .data, .bss or .rodata. These internal maps are integrated into the normal map handling of libbpf such that when user traverses all obj maps, they can be differentiated from user-created ones via bpf_map__is_internal(). In later steps when we actually create these maps in the kernel via bpf_object__create_maps(), then for .data and .rodata sections their content is copied into the map through bpf_map_update_elem(). For .bss this is not necessary since array map is already zero-initialized by default. Additionally, for .rodata the map is frozen as read-only after setup, such that neither from program nor syscall side writes would be possible. 3) In bpf_program__collect_reloc() step, we record the corresponding map, insn index, and relocation type for the global data. 4) And last but not least in the actual relocation step in bpf_program__relocate(), we mark the ldimm64 instruction with src_reg = BPF_PSEUDO_MAP_VALUE where in the first imm field the map's file descriptor is stored as similarly done as in BPF_PSEUDO_MAP_FD, and in the second imm field (as ldimm64 is 2-insn wide) we store the access offset into the section. Given these maps have only single element ldimm64's off remains zero in both parts. 5) On kernel side, this special marked BPF_PSEUDO_MAP_VALUE load will then store the actual target address in order to have a 'map-lookup'-free access. That is, the actual map value base address + offset. The destination register in the verifier will then be marked as PTR_TO_MAP_VALUE, containing the fixed offset as reg->off and backing BPF map as reg->map_ptr. Meaning, it's treated as any other normal map value from verification side, only with efficient, direct value access instead of actual call to map lookup helper as in the typical case. Currently, only support for static global variables has been added, and libbpf rejects non-static global variables from loading. This can be lifted until we have proper semantics for how BPF will treat multi-object BPF loads. From BTF side, libbpf will set the value type id of the types corresponding to the ".bss", ".data" and ".rodata" names which LLVM will emit without the object name prefix. The key type will be left as zero, thus making use of the key-less BTF option in array maps. Simple example dump of program using globals vars in each section: # bpftool prog [...] 6784: sched_cls name load_static_dat tag a7e1291567277844 gpl loaded_at 2019-03-11T15:39:34+0000 uid 0 xlated 1776B jited 993B memlock 4096B map_ids 2238,2237,2235,2236,2239,2240 # bpftool map show id 2237 2237: array name test_glo.bss flags 0x0 key 4B value 64B max_entries 1 memlock 4096B # bpftool map show id 2235 2235: array name test_glo.data flags 0x0 key 4B value 64B max_entries 1 memlock 4096B # bpftool map show id 2236 2236: array name test_glo.rodata flags 0x80 key 4B value 96B max_entries 1 memlock 4096B # bpftool prog dump xlated id 6784 int load_static_data(struct __sk_buff * skb): ; int load_static_data(struct __sk_buff *skb) 0: (b7) r6 = 0 ; test_reloc(number, 0, &num0); 1: (63) *(u32 *)(r10 -4) = r6 2: (bf) r2 = r10 ; int load_static_data(struct __sk_buff *skb) 3: (07) r2 += -4 ; test_reloc(number, 0, &num0); 4: (18) r1 = map[id:2238] 6: (18) r3 = map[id:2237][0]+0 <-- direct addr in .bss area 8: (b7) r4 = 0 9: (85) call array_map_update_elem#100464 10: (b7) r1 = 1 ; test_reloc(number, 1, &num1); [...] ; test_reloc(string, 2, str2); 120: (18) r8 = map[id:2237][0]+16 <-- same here at offset +16 122: (18) r1 = map[id:2239] 124: (18) r3 = map[id:2237][0]+16 126: (b7) r4 = 0 127: (85) call array_map_update_elem#100464 128: (b7) r1 = 120 ; str1[5] = 'x'; 129: (73) *(u8 *)(r9 +5) = r1 ; test_reloc(string, 3, str1); 130: (b7) r1 = 3 131: (63) *(u32 *)(r10 -4) = r1 132: (b7) r9 = 3 133: (bf) r2 = r10 ; int load_static_data(struct __sk_buff *skb) 134: (07) r2 += -4 ; test_reloc(string, 3, str1); 135: (18) r1 = map[id:2239] 137: (18) r3 = map[id:2235][0]+16 <-- direct addr in .data area 139: (b7) r4 = 0 140: (85) call array_map_update_elem#100464 141: (b7) r1 = 111 ; __builtin_memcpy(&str2[2], "hello", sizeof("hello")); 142: (73) *(u8 *)(r8 +6) = r1 <-- further access based on .bss data 143: (b7) r1 = 108 144: (73) *(u8 *)(r8 +5) = r1 [...] For Cilium use-case in particular, this enables migrating configuration constants from Cilium daemon's generated header defines into global data sections such that expensive runtime recompilations with LLVM can be avoided altogether. Instead, the ELF file becomes effectively a "template", meaning, it is compiled only once (!) and the Cilium daemon will then rewrite relevant configuration data from the ELF's .data or .rodata sections directly instead of recompiling the program. The updated ELF is then loaded into the kernel and atomically replaces the existing program in the networking datapath. More info in [0]. Based upon recent fix in LLVM, commit c0db6b6bd444 ("[BPF] Don't fail for static variables"). [0] LPC 2018, BPF track, "ELF relocation for static data in BPF", http://vger.kernel.org/lpc-bpf2018.html#session-3 Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 2 +- tools/lib/bpf/bpf.c | 10 ++ tools/lib/bpf/bpf.h | 1 + tools/lib/bpf/libbpf.c | 342 ++++++++++++++++++++++++++++++++++++++++------- tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 6 + 6 files changed, 314 insertions(+), 48 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 2a578bfc0bca..008344507700 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 2 +BPF_EXTRAVERSION = 3 MAKEFLAGS += --no-print-directory diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a1db869a6fda..c039094ad3aa 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -429,6 +429,16 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); } +int bpf_map_freeze(int fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + + return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index e2c0df7b831f..c9d218d21453 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -117,6 +117,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); +LIBBPF_API int bpf_map_freeze(int fd); LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6dba0f01673b..f7b245fbb960 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7,6 +7,7 @@ * Copyright (C) 2015 Wang Nan * Copyright (C) 2015 Huawei Inc. * Copyright (C) 2017 Nicira, Inc. + * Copyright (C) 2019 Isovalent, Inc. */ #ifndef _GNU_SOURCE @@ -149,6 +150,7 @@ struct bpf_program { enum { RELO_LD64, RELO_CALL, + RELO_DATA, } type; int insn_idx; union { @@ -182,6 +184,19 @@ struct bpf_program { __u32 line_info_cnt; }; +enum libbpf_map_type { + LIBBPF_MAP_UNSPEC, + LIBBPF_MAP_DATA, + LIBBPF_MAP_BSS, + LIBBPF_MAP_RODATA, +}; + +static const char * const libbpf_type_to_btf_name[] = { + [LIBBPF_MAP_DATA] = ".data", + [LIBBPF_MAP_BSS] = ".bss", + [LIBBPF_MAP_RODATA] = ".rodata", +}; + struct bpf_map { int fd; char *name; @@ -193,11 +208,18 @@ struct bpf_map { __u32 btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; + enum libbpf_map_type libbpf_type; +}; + +struct bpf_secdata { + void *rodata; + void *data; }; static LIST_HEAD(bpf_objects_list); struct bpf_object { + char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; @@ -205,6 +227,7 @@ struct bpf_object { size_t nr_programs; struct bpf_map *maps; size_t nr_maps; + struct bpf_secdata sections; bool loaded; bool has_pseudo_calls; @@ -220,6 +243,9 @@ struct bpf_object { Elf *elf; GElf_Ehdr ehdr; Elf_Data *symbols; + Elf_Data *data; + Elf_Data *rodata; + Elf_Data *bss; size_t strtabidx; struct { GElf_Shdr shdr; @@ -228,6 +254,9 @@ struct bpf_object { int nr_reloc; int maps_shndx; int text_shndx; + int data_shndx; + int rodata_shndx; + int bss_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -449,6 +478,7 @@ static struct bpf_object *bpf_object__new(const char *path, size_t obj_buf_sz) { struct bpf_object *obj; + char *end; obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); if (!obj) { @@ -457,8 +487,14 @@ static struct bpf_object *bpf_object__new(const char *path, } strcpy(obj->path, path); - obj->efile.fd = -1; + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + obj->efile.fd = -1; /* * Caller of this function should also calls * bpf_object__elf_finish() after data collection to return @@ -468,6 +504,9 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.maps_shndx = -1; + obj->efile.data_shndx = -1; + obj->efile.rodata_shndx = -1; + obj->efile.bss_shndx = -1; obj->loaded = false; @@ -486,6 +525,9 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.elf = NULL; } obj->efile.symbols = NULL; + obj->efile.data = NULL; + obj->efile.rodata = NULL; + obj->efile.bss = NULL; zfree(&obj->efile.reloc); obj->efile.nr_reloc = 0; @@ -627,27 +669,76 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } +static bool bpf_object__has_maps(const struct bpf_object *obj) +{ + return obj->efile.maps_shndx >= 0 || + obj->efile.data_shndx >= 0 || + obj->efile.rodata_shndx >= 0 || + obj->efile.bss_shndx >= 0; +} + +static int +bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, + enum libbpf_map_type type, Elf_Data *data, + void **data_buff) +{ + struct bpf_map_def *def = &map->def; + char map_name[BPF_OBJ_NAME_LEN]; + + map->libbpf_type = type; + map->offset = ~(typeof(map->offset))0; + snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, + libbpf_type_to_btf_name[type]); + map->name = strdup(map_name); + if (!map->name) { + pr_warning("failed to alloc map name\n"); + return -ENOMEM; + } + + def->type = BPF_MAP_TYPE_ARRAY; + def->key_size = sizeof(int); + def->value_size = data->d_size; + def->max_entries = 1; + def->map_flags = type == LIBBPF_MAP_RODATA ? + BPF_F_RDONLY_PROG : 0; + if (data_buff) { + *data_buff = malloc(data->d_size); + if (!*data_buff) { + zfree(&map->name); + pr_warning("failed to alloc map content buffer\n"); + return -ENOMEM; + } + memcpy(*data_buff, data->d_buf, data->d_size); + } + + pr_debug("map %ld is \"%s\"\n", map - obj->maps, map->name); + return 0; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { + int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0; bool strict = !(flags & MAPS_RELAX_COMPAT); - int i, map_idx, map_def_sz, nr_maps = 0; - Elf_Scn *scn; - Elf_Data *data = NULL; Elf_Data *symbols = obj->efile.symbols; + Elf_Data *data = NULL; + int ret = 0; - if (obj->efile.maps_shndx < 0) - return -EINVAL; if (!symbols) return -EINVAL; + nr_syms = symbols->d_size / sizeof(GElf_Sym); - scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); - if (scn) - data = elf_getdata(scn, NULL); - if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); - return -EINVAL; + if (obj->efile.maps_shndx >= 0) { + Elf_Scn *scn = elf_getscn(obj->efile.elf, + obj->efile.maps_shndx); + + if (scn) + data = elf_getdata(scn, NULL); + if (!scn || !data) { + pr_warning("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); + return -EINVAL; + } } /* @@ -657,7 +748,13 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) * * TODO: Detect array of map and report error. */ - for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + if (obj->efile.data_shndx >= 0) + nr_maps_glob++; + if (obj->efile.rodata_shndx >= 0) + nr_maps_glob++; + if (obj->efile.bss_shndx >= 0) + nr_maps_glob++; + for (i = 0; data && i < nr_syms; i++) { GElf_Sym sym; if (!gelf_getsym(symbols, i, &sym)) @@ -670,19 +767,21 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) /* Alloc obj->maps and fill nr_maps. */ pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, nr_maps, data->d_size); - - if (!nr_maps) + if (!nr_maps && !nr_maps_glob) return 0; /* Assume equally sized map definitions */ - map_def_sz = data->d_size / nr_maps; - if (!data->d_size || (data->d_size % nr_maps) != 0) { - pr_warning("unable to determine map definition size " - "section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); - return -EINVAL; + if (data) { + map_def_sz = data->d_size / nr_maps; + if (!data->d_size || (data->d_size % nr_maps) != 0) { + pr_warning("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); + return -EINVAL; + } } + nr_maps += nr_maps_glob; obj->maps = calloc(nr_maps, sizeof(obj->maps[0])); if (!obj->maps) { pr_warning("alloc maps for object failed\n"); @@ -703,7 +802,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) /* * Fill obj->maps using data in "maps" section. */ - for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + for (i = 0, map_idx = 0; data && i < nr_syms; i++) { GElf_Sym sym; const char *map_name; struct bpf_map_def *def; @@ -716,6 +815,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); + + obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC; obj->maps[map_idx].offset = sym.st_value; if (sym.st_value + map_def_sz > data->d_size) { pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", @@ -764,8 +865,27 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_idx++; } - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); - return 0; + /* + * Populate rest of obj->maps with libbpf internal maps. + */ + if (obj->efile.data_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_DATA, + obj->efile.data, + &obj->sections.data); + if (!ret && obj->efile.rodata_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_RODATA, + obj->efile.rodata, + &obj->sections.rodata); + if (!ret && obj->efile.bss_shndx >= 0) + ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], + LIBBPF_MAP_BSS, + obj->efile.bss, NULL); + if (!ret) + qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), + compare_bpf_map); + return ret; } static bool section_have_execinstr(struct bpf_object *obj, int idx) @@ -885,6 +1005,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("failed to alloc program %s (%s): %s", name, obj->path, cp); } + } else if (strcmp(name, ".data") == 0) { + obj->efile.data = data; + obj->efile.data_shndx = idx; + } else if (strcmp(name, ".rodata") == 0) { + obj->efile.rodata = data; + obj->efile.rodata_shndx = idx; + } else { + pr_debug("skip section(%d) %s\n", idx, name); } } else if (sh.sh_type == SHT_REL) { void *reloc = obj->efile.reloc; @@ -912,6 +1040,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.reloc[n].shdr = sh; obj->efile.reloc[n].data = data; } + } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + obj->efile.bss = data; + obj->efile.bss_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -938,7 +1069,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) } } } - if (obj->efile.maps_shndx >= 0) { + if (bpf_object__has_maps(obj)) { err = bpf_object__init_maps(obj, flags); if (err) goto out; @@ -974,13 +1105,46 @@ bpf_object__find_program_by_title(struct bpf_object *obj, const char *title) return NULL; } +static bool bpf_object__shndx_is_data(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.data_shndx || + shndx == obj->efile.bss_shndx || + shndx == obj->efile.rodata_shndx; +} + +static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.maps_shndx; +} + +static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, + int shndx) +{ + return shndx == obj->efile.text_shndx || + bpf_object__shndx_is_maps(obj, shndx) || + bpf_object__shndx_is_data(obj, shndx); +} + +static enum libbpf_map_type +bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) +{ + if (shndx == obj->efile.data_shndx) + return LIBBPF_MAP_DATA; + else if (shndx == obj->efile.bss_shndx) + return LIBBPF_MAP_BSS; + else if (shndx == obj->efile.rodata_shndx) + return LIBBPF_MAP_RODATA; + else + return LIBBPF_MAP_UNSPEC; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) { Elf_Data *symbols = obj->efile.symbols; - int text_shndx = obj->efile.text_shndx; - int maps_shndx = obj->efile.maps_shndx; struct bpf_map *maps = obj->maps; size_t nr_maps = obj->nr_maps; int i, nrels; @@ -1000,7 +1164,10 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GElf_Sym sym; GElf_Rel rel; unsigned int insn_idx; + unsigned int shdr_idx; struct bpf_insn *insns = prog->insns; + enum libbpf_map_type type; + const char *name; size_t map_idx; if (!gelf_getrel(data, i, &rel)) { @@ -1015,13 +1182,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } - pr_debug("relo for %lld value %lld name %d\n", + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : ""; + + pr_debug("relo for %lld value %lld name %d (\'%s\')\n", (long long) (rel.r_info >> 32), - (long long) sym.st_value, sym.st_name); + (long long) sym.st_value, sym.st_name, name); - if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) { - pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", - prog->section_name, sym.st_shndx); + shdr_idx = sym.st_shndx; + if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { + pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", + prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } @@ -1046,10 +1218,22 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__RELOC; } - if (sym.st_shndx == maps_shndx) { - /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */ + if (bpf_object__shndx_is_maps(obj, shdr_idx) || + bpf_object__shndx_is_data(obj, shdr_idx)) { + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + if (type != LIBBPF_MAP_UNSPEC && + GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { + pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", + name, insn_idx, insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].offset == sym.st_value) { + if (maps[map_idx].libbpf_type != type) + continue; + if (type != LIBBPF_MAP_UNSPEC || + (type == LIBBPF_MAP_UNSPEC && + maps[map_idx].offset == sym.st_value)) { pr_debug("relocation: find map %zd (%s) for insn %u\n", map_idx, maps[map_idx].name, insn_idx); break; @@ -1062,7 +1246,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, return -LIBBPF_ERRNO__RELOC; } - prog->reloc_desc[i].type = RELO_LD64; + prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ? + RELO_DATA : RELO_LD64; prog->reloc_desc[i].insn_idx = insn_idx; prog->reloc_desc[i].map_idx = map_idx; } @@ -1073,18 +1258,27 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { struct bpf_map_def *def = &map->def; - __u32 key_type_id, value_type_id; + __u32 key_type_id = 0, value_type_id = 0; int ret; - ret = btf__get_map_kv_tids(btf, map->name, def->key_size, - def->value_size, &key_type_id, - &value_type_id); - if (ret) + if (!bpf_map__is_internal(map)) { + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + } else { + /* + * LLVM annotates global data differently in BTF, that is, + * only as '.data', '.bss' or '.rodata'. + */ + ret = btf__find_by_name(btf, + libbpf_type_to_btf_name[map->libbpf_type]); + } + if (ret < 0) return ret; map->btf_key_type_id = key_type_id; - map->btf_value_type_id = value_type_id; - + map->btf_value_type_id = bpf_map__is_internal(map) ? + ret : value_type_id; return 0; } @@ -1195,6 +1389,34 @@ bpf_object__probe_caps(struct bpf_object *obj) return bpf_object__probe_name(obj); } +static int +bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, zero = 0; + __u8 *data; + + /* Nothing to do here since kernel already zero-initializes .bss map. */ + if (map->libbpf_type == LIBBPF_MAP_BSS) + return 0; + + data = map->libbpf_type == LIBBPF_MAP_DATA ? + obj->sections.data : obj->sections.rodata; + + err = bpf_map_update_elem(map->fd, &zero, data, 0); + /* Freeze .rodata map as read-only from syscall side. */ + if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + err = bpf_map_freeze(map->fd); + if (err) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error freezing map(%s) as read-only: %s\n", + map->name, cp); + err = 0; + } + } + return err; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -1252,6 +1474,7 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; +err_out: cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", map->name, cp); @@ -1259,6 +1482,15 @@ bpf_object__create_maps(struct bpf_object *obj) zclose(obj->maps[j].fd); return err; } + + if (bpf_map__is_internal(map)) { + err = bpf_object__populate_internal_map(obj, map); + if (err < 0) { + zclose(*pfd); + goto err_out; + } + } + pr_debug("create map %s: fd=%d\n", map->name, *pfd); } @@ -1413,19 +1645,27 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) return 0; for (i = 0; i < prog->nr_reloc; i++) { - if (prog->reloc_desc[i].type == RELO_LD64) { + if (prog->reloc_desc[i].type == RELO_LD64 || + prog->reloc_desc[i].type == RELO_DATA) { + bool relo_data = prog->reloc_desc[i].type == RELO_DATA; struct bpf_insn *insns = prog->insns; int insn_idx, map_idx; insn_idx = prog->reloc_desc[i].insn_idx; map_idx = prog->reloc_desc[i].map_idx; - if (insn_idx >= (int)prog->insns_cnt) { + if (insn_idx + 1 >= (int)prog->insns_cnt) { pr_warning("relocation out of range: '%s'\n", prog->section_name); return -LIBBPF_ERRNO__RELOC; } - insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + + if (!relo_data) { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + } else { + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE; + insns[insn_idx + 1].imm = insns[insn_idx].imm; + } insns[insn_idx].imm = obj->maps[map_idx].fd; } else if (prog->reloc_desc[i].type == RELO_CALL) { err = bpf_program__reloc_text(prog, obj, @@ -2321,6 +2561,9 @@ void bpf_object__close(struct bpf_object *obj) obj->maps[i].priv = NULL; obj->maps[i].clear_priv = NULL; } + + zfree(&obj->sections.rodata); + zfree(&obj->sections.data); zfree(&obj->maps); obj->nr_maps = 0; @@ -2798,6 +3041,11 @@ bool bpf_map__is_offload_neutral(struct bpf_map *map) return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; } +bool bpf_map__is_internal(struct bpf_map *map) +{ + return map->libbpf_type != LIBBPF_MAP_UNSPEC; +} + void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { map->map_ifindex = ifindex; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 531323391d07..12db2822c8e7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -301,6 +301,7 @@ LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); +LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f3ce50500cf2..be42bdffc8de 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -157,3 +157,9 @@ LIBBPF_0.0.2 { bpf_program__bpil_addr_to_offs; bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; + +LIBBPF_0.0.3 { + global: + bpf_map__is_internal; + bpf_map_freeze; +} LIBBPF_0.0.2; -- cgit v1.2.3-59-g8ed1b From 1713d68b3bf039d029afd74653c9325f5003ccbe Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Apr 2019 23:20:14 +0200 Subject: bpf, libbpf: add support for BTF Var and DataSec This adds libbpf support for BTF Var and DataSec kinds. Main point here is that libbpf needs to do some preparatory work before the whole BTF object can be loaded into the kernel, that is, fixing up of DataSec size taken from the ELF section size and non-static variable offset which needs to be taken from the ELF's string section. Upstream LLVM doesn't fix these up since at time of BTF emission it is too early in the compilation process thus this information isn't available yet, hence loader needs to take care of it. Note, deduplication handling has not been in the scope of this work and needs to be addressed in a future commit. Signed-off-by: Daniel Borkmann Link: https://reviews.llvm.org/D59441 Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 97 +++++++++++++++++++++++++++++- tools/lib/bpf/btf.h | 3 + tools/lib/bpf/libbpf.c | 150 +++++++++++++++++++++++++++++++++++++++++------ tools/lib/bpf/libbpf.h | 4 ++ tools/lib/bpf/libbpf.map | 1 + 5 files changed, 235 insertions(+), 20 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 87e3020ac1bc..0f9079b9539e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -24,6 +24,8 @@ ((k) == BTF_KIND_CONST) || \ ((k) == BTF_KIND_RESTRICT)) +#define IS_VAR(k) ((k) == BTF_KIND_VAR) + static struct btf_type btf_void; struct btf { @@ -212,6 +214,10 @@ static int btf_type_size(struct btf_type *t) return base_size + vlen * sizeof(struct btf_member); case BTF_KIND_FUNC_PROTO: return base_size + vlen * sizeof(struct btf_param); + case BTF_KIND_VAR: + return base_size + sizeof(struct btf_var); + case BTF_KIND_DATASEC: + return base_size + vlen * sizeof(struct btf_var_secinfo); default: pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); return -EINVAL; @@ -283,6 +289,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: + case BTF_KIND_DATASEC: size = t->size; goto done; case BTF_KIND_PTR: @@ -292,6 +299,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -326,7 +334,8 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); while (depth < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t) && - IS_MODIFIER(BTF_INFO_KIND(t->info))) { + (IS_MODIFIER(BTF_INFO_KIND(t->info)) || + IS_VAR(BTF_INFO_KIND(t->info)))) { type_id = t->type; t = btf__type_by_id(btf, type_id); depth++; @@ -408,6 +417,92 @@ done: return btf; } +static int compare_vsi_off(const void *_a, const void *_b) +{ + const struct btf_var_secinfo *a = _a; + const struct btf_var_secinfo *b = _b; + + return a->offset - b->offset; +} + +static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, + struct btf_type *t) +{ + __u32 size = 0, off = 0, i, vars = BTF_INFO_VLEN(t->info); + const char *name = btf__name_by_offset(btf, t->name_off); + const struct btf_type *t_var; + struct btf_var_secinfo *vsi; + struct btf_var *var; + int ret; + + if (!name) { + pr_debug("No name found in string section for DATASEC kind.\n"); + return -ENOENT; + } + + ret = bpf_object__section_size(obj, name, &size); + if (ret || !size || (t->size && t->size != size)) { + pr_debug("Invalid size for section %s: %u bytes\n", name, size); + return -ENOENT; + } + + t->size = size; + + for (i = 0, vsi = (struct btf_var_secinfo *)(t + 1); + i < vars; i++, vsi++) { + t_var = btf__type_by_id(btf, vsi->type); + var = (struct btf_var *)(t_var + 1); + + if (BTF_INFO_KIND(t_var->info) != BTF_KIND_VAR) { + pr_debug("Non-VAR type seen in section %s\n", name); + return -EINVAL; + } + + if (var->linkage == BTF_VAR_STATIC) + continue; + + name = btf__name_by_offset(btf, t_var->name_off); + if (!name) { + pr_debug("No name found in string section for VAR kind\n"); + return -ENOENT; + } + + ret = bpf_object__variable_offset(obj, name, &off); + if (ret) { + pr_debug("No offset found in symbol table for VAR %s\n", name); + return -ENOENT; + } + + vsi->offset = off; + } + + qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); + return 0; +} + +int btf__finalize_data(struct bpf_object *obj, struct btf *btf) +{ + int err = 0; + __u32 i; + + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = btf->types[i]; + + /* Loader needs to fix up some of the things compiler + * couldn't get its hands on while emitting BTF. This + * is section size and global variable offset. We use + * the info from the ELF itself for this purpose. + */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC) { + err = btf_fixup_datasec(obj, btf, t); + if (err) + break; + } + } + + return err; +} + int btf__load(struct btf *btf) { __u32 log_buf_size = BPF_LOG_BUF_SIZE; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 28a1e1e59861..c7b399e81fce 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -21,6 +21,8 @@ struct btf; struct btf_ext; struct btf_type; +struct bpf_object; + /* * The .BTF.ext ELF section layout defined as * struct btf_ext_header @@ -57,6 +59,7 @@ struct btf_ext_header { LIBBPF_API void btf__free(struct btf *btf); LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f7b245fbb960..e2a457e7c318 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -669,6 +669,112 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) return false; } +static int bpf_object_search_section_size(const struct bpf_object *obj, + const char *name, size_t *d_size) +{ + const GElf_Ehdr *ep = &obj->efile.ehdr; + Elf *elf = obj->efile.elf; + Elf_Scn *scn = NULL; + int idx = 0; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + const char *sec_name; + Elf_Data *data; + GElf_Shdr sh; + + idx++; + if (gelf_getshdr(scn, &sh) != &sh) { + pr_warning("failed to get section(%d) header from %s\n", + idx, obj->path); + return -EIO; + } + + sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); + if (!sec_name) { + pr_warning("failed to get section(%d) name from %s\n", + idx, obj->path); + return -EIO; + } + + if (strcmp(name, sec_name)) + continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warning("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); + return -EIO; + } + + *d_size = data->d_size; + return 0; + } + + return -ENOENT; +} + +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size) +{ + int ret = -ENOENT; + size_t d_size; + + *size = 0; + if (!name) { + return -EINVAL; + } else if (!strcmp(name, ".data")) { + if (obj->efile.data) + *size = obj->efile.data->d_size; + } else if (!strcmp(name, ".bss")) { + if (obj->efile.bss) + *size = obj->efile.bss->d_size; + } else if (!strcmp(name, ".rodata")) { + if (obj->efile.rodata) + *size = obj->efile.rodata->d_size; + } else { + ret = bpf_object_search_section_size(obj, name, &d_size); + if (!ret) + *size = d_size; + } + + return *size ? 0 : ret; +} + +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off) +{ + Elf_Data *symbols = obj->efile.symbols; + const char *sname; + size_t si; + + if (!name || !off) + return -EINVAL; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + GELF_ST_TYPE(sym.st_info) != STT_OBJECT) + continue; + + sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!sname) { + pr_warning("failed to get sym name string for var %s\n", + name); + return -EIO; + } + if (strcmp(name, sname) == 0) { + *off = sym.st_value; + return 0; + } + } + + return -ENOENT; +} + static bool bpf_object__has_maps(const struct bpf_object *obj) { return obj->efile.maps_shndx >= 0 || @@ -911,6 +1017,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Data *btf_ext_data = NULL; + Elf_Data *btf_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -954,32 +1061,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) (int)sh.sh_link, (unsigned long)sh.sh_flags, (int)sh.sh_type); - if (strcmp(name, "license") == 0) + if (strcmp(name, "license") == 0) { err = bpf_object__init_license(obj, data->d_buf, data->d_size); - else if (strcmp(name, "version") == 0) + } else if (strcmp(name, "version") == 0) { err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); - else if (strcmp(name, "maps") == 0) + } else if (strcmp(name, "maps") == 0) { obj->efile.maps_shndx = idx; - else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_ELF_SEC, PTR_ERR(obj->btf)); - obj->btf = NULL; - continue; - } - err = btf__load(obj->btf); - if (err) { - pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n", - BTF_ELF_SEC, err); - btf__free(obj->btf); - obj->btf = NULL; - err = 0; - } + } else if (strcmp(name, BTF_ELF_SEC) == 0) { + btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { @@ -1054,6 +1147,25 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_data) { + obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); + if (IS_ERR(obj->btf)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_ELF_SEC, PTR_ERR(obj->btf)); + obj->btf = NULL; + } else { + err = btf__finalize_data(obj, obj->btf); + if (!err) + err = btf__load(obj->btf); + if (err) { + pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; + } + } + } if (btf_ext_data) { if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 12db2822c8e7..c5ff00515ce7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -75,6 +75,10 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index be42bdffc8de..673001787cba 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -162,4 +162,5 @@ LIBBPF_0.0.3 { global: bpf_map__is_internal; bpf_map_freeze; + btf__finalize_data; } LIBBPF_0.0.2; -- cgit v1.2.3-59-g8ed1b From 69a0f9ecef22131982ba328e6b74ebb082bc0992 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Apr 2019 17:37:41 -0700 Subject: bpf, bpftool: fix a few ubsan warnings The issue is reported at https://github.com/libbpf/libbpf/issues/28. Basically, per C standard, for void *memcpy(void *dest, const void *src, size_t n) if "dest" or "src" is NULL, regardless of whether "n" is 0 or not, the result of memcpy is undefined. clang ubsan reported three such instances in bpf.c with the following pattern: memcpy(dest, 0, 0). Although in practice, no known compiler will cause issues when copy size is 0. Let us still fix the issue to silence ubsan warnings. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c039094ad3aa..dababce68f0b 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -79,7 +79,6 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -89,8 +88,9 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.value_size = create_attr->value_size; attr.max_entries = create_attr->max_entries; attr.map_flags = create_attr->map_flags; - memcpy(attr.map_name, create_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (create_attr->name) + memcpy(attr.map_name, create_attr->name, + min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); attr.numa_node = create_attr->numa_node; attr.btf_fd = create_attr->btf_fd; attr.btf_key_type_id = create_attr->btf_key_type_id; @@ -155,7 +155,6 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -166,7 +165,9 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; - memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (name) + memcpy(attr.map_name, name, + min(strlen(name), BPF_OBJ_NAME_LEN - 1)); if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; @@ -216,7 +217,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, void *finfo = NULL, *linfo = NULL; union bpf_attr attr; __u32 log_level; - __u32 name_len; int fd; if (!load_attr || !log_buf != !log_buf_sz) @@ -226,8 +226,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, if (log_level > (4 | 2 | 1) || (log_level && !log_buf)) return -EINVAL; - name_len = load_attr->name ? strlen(load_attr->name) : 0; - memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; @@ -253,8 +251,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.line_info_rec_size = load_attr->line_info_rec_size; attr.line_info_cnt = load_attr->line_info_cnt; attr.line_info = ptr_to_u64(load_attr->line_info); - memcpy(attr.prog_name, load_attr->name, - min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (load_attr->name) + memcpy(attr.prog_name, load_attr->name, + min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) -- cgit v1.2.3-59-g8ed1b From 50bd645b3a21a374dbd0fa8273a5f4e98001fb05 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 10 Apr 2019 08:54:16 +0200 Subject: libbpf: fix crash in XDP socket part with new larger BPF_LOG_BUF_SIZE In commit da11b417583e ("libbpf: teach libbpf about log_level bit 2"), the BPF_LOG_BUF_SIZE was increased to 16M. The XDP socket part of libbpf allocated the log_buf on the stack, but for the new 16M buffer size this is not going to work. Change the code so it uses a 16K buffer instead. Fixes: da11b417583e ("libbpf: teach libbpf about log_level bit 2") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 8d0078b65486..557ef8d1250d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -259,7 +259,8 @@ out_umem_alloc: static int xsk_load_xdp_prog(struct xsk_socket *xsk) { - char bpf_log_buf[BPF_LOG_BUF_SIZE]; + static const int log_buf_size = 16 * 1024; + char log_buf[log_buf_size]; int err, prog_fd; /* This is the C-program: @@ -308,10 +309,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, - "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, - BPF_LOG_BUF_SIZE); + "LGPL-2.1 or BSD-2-Clause", 0, log_buf, + log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", bpf_log_buf); + pr_warning("BPF log buffer:\n%s", log_buf); return prog_fd; } -- cgit v1.2.3-59-g8ed1b From d5adbdd77ecc1d841af244a10958792141830d30 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 10 Apr 2019 18:36:43 -0700 Subject: libbpf: Fix build with gcc-8 Reported in [1]. With gcc 8.3.0 the following error is issued: cc -Ibpf@sta -I. -I.. -I.././include -I.././include/uapi -fdiagnostics-color=always -fsanitize=address,undefined -fno-omit-frame-pointer -pipe -D_FILE_OFFSET_BITS=64 -Wall -Winvalid-pch -Werror -g -fPIC -g -O2 -Werror -Wall -Wno-pointer-arith -Wno-sign-compare -MD -MQ 'bpf@sta/src_libbpf.c.o' -MF 'bpf@sta/src_libbpf.c.o.d' -o 'bpf@sta/src_libbpf.c.o' -c ../src/libbpf.c ../src/libbpf.c: In function 'bpf_object__elf_collect': ../src/libbpf.c:947:18: error: 'map_def_sz' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (map_def_sz <= sizeof(struct bpf_map_def)) { ~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../src/libbpf.c:827:18: note: 'map_def_sz' was declared here int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0; ^~~~~~~~~~ According to [2] -Wmaybe-uninitialized is enabled by -Wall. Same error is generated by clang's -Wconditional-uninitialized. [1] https://github.com/libbpf/libbpf/pull/29#issuecomment-481902601 [2] https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections") Reported-by: Evgeny Vereshchagin Signed-off-by: Andrey Ignatov Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a457e7c318..67484cf32b2e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -824,7 +824,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, static int bpf_object__init_maps(struct bpf_object *obj, int flags) { - int i, map_idx, map_def_sz, nr_syms, nr_maps = 0, nr_maps_glob = 0; + int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0; bool strict = !(flags & MAPS_RELAX_COMPAT); Elf_Data *symbols = obj->efile.symbols; Elf_Data *data = NULL; -- cgit v1.2.3-59-g8ed1b From 5e903c656b98614698a891c6e098186272cbba14 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 9 Apr 2019 11:49:10 -0700 Subject: libbpf: add support for ctx_{size, }_{in, out} in BPF_PROG_TEST_RUN Support recently introduced input/output context for test runs. We extend only bpf_prog_test_run_xattr. bpf_prog_test_run is unextendable and left as is. Signed-off-by: Stanislav Fomichev Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 7 +++++++ tools/lib/bpf/bpf.c | 5 +++++ tools/lib/bpf/bpf.h | 5 +++++ 3 files changed, 17 insertions(+) (limited to 'tools/lib') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index af1cbd951f26..31a27dd337dc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -412,6 +412,13 @@ union bpf_attr { __aligned_u64 data_out; __u32 repeat; __u32 duration; + __u32 ctx_size_in; /* input: len of ctx_in */ + __u32 ctx_size_out; /* input/output: len of ctx_out + * returns ENOSPC if ctx_out + * is too small. + */ + __aligned_u64 ctx_in; + __aligned_u64 ctx_out; } test; struct { /* anonymous struct used by BPF_*_GET_*_ID */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index dababce68f0b..955191c64b64 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -554,10 +554,15 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) attr.test.data_out = ptr_to_u64(test_attr->data_out); attr.test.data_size_in = test_attr->data_size_in; attr.test.data_size_out = test_attr->data_size_out; + attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); + attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); + attr.test.ctx_size_in = test_attr->ctx_size_in; + attr.test.ctx_size_out = test_attr->ctx_size_out; attr.test.repeat = test_attr->repeat; ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); test_attr->data_size_out = attr.test.data_size_out; + test_attr->ctx_size_out = attr.test.ctx_size_out; test_attr->retval = attr.test.retval; test_attr->duration = attr.test.duration; return ret; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index c9d218d21453..bc30783d1403 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -136,6 +136,11 @@ struct bpf_prog_test_run_attr { * out: length of data_out */ __u32 retval; /* out: return code of the BPF program */ __u32 duration; /* out: average per repetition in ns */ + const void *ctx_in; /* optional */ + __u32 ctx_size_in; + void *ctx_out; /* optional */ + __u32 ctx_size_out; /* in: max length of ctx_out + * out: length of cxt_out */ }; LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); -- cgit v1.2.3-59-g8ed1b From 063cc9f06ee6cac12dbc68a504bc4ff56bde790d Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 8 Mar 2019 09:15:26 -0800 Subject: libbpf: Support sysctl hook Support BPF_PROG_TYPE_CGROUP_SYSCTL program in libbpf: identifying program and attach types by section name, probe. Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 3 +++ tools/lib/bpf/libbpf_probes.c | 1 + 2 files changed, 4 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 67484cf32b2e..e5b77ad97795 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2064,6 +2064,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; case BPF_PROG_TYPE_KPROBE: default: @@ -3004,6 +3005,8 @@ static const struct { BPF_CGROUP_UDP4_SENDMSG), BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG), + BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_CGROUP_SYSCTL), }; #undef BPF_PROG_SEC_IMPL diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 8c3a1c04dcb2..0f25541632e3 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -97,6 +97,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_CGROUP_SYSCTL: default: break; } -- cgit v1.2.3-59-g8ed1b From 189cf5a4a7d5e0349f3079f05a8ccf4e4a2f0c3e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 Apr 2019 16:48:07 -0700 Subject: btf: add support for VAR and DATASEC in btf_dedup() This patch adds support for VAR and DATASEC in btf_dedup(). VAR/DATASEC are never deduplicated, but they need to be processed anyway as types they refer to might need to be remapped due to deduplication and compaction. Cc: Daniel Borkmann Cc: Yonghong Song Cc: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/btf.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 701e7e28ada3..75eaf10b9e1a 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1354,8 +1354,16 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, } /* special BTF "void" type is made canonical immediately */ d->map[0] = 0; - for (i = 1; i <= btf->nr_types; i++) - d->map[i] = BTF_UNPROCESSED_ID; + for (i = 1; i <= btf->nr_types; i++) { + struct btf_type *t = d->btf->types[i]; + __u16 kind = BTF_INFO_KIND(t->info); + + /* VAR and DATASEC are never deduped and are self-canonical */ + if (kind == BTF_KIND_VAR || kind == BTF_KIND_DATASEC) + d->map[i] = i; + else + d->map[i] = BTF_UNPROCESSED_ID; + } d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); if (!d->hypot_map) { @@ -1946,6 +1954,8 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_UNION: case BTF_KIND_FUNC: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_VAR: + case BTF_KIND_DATASEC: return 0; case BTF_KIND_INT: @@ -2699,6 +2709,7 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_VAR: r = btf_dedup_remap_type_id(d, t->type); if (r < 0) return r; @@ -2753,6 +2764,20 @@ static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) break; } + case BTF_KIND_DATASEC: { + struct btf_var_secinfo *var = (struct btf_var_secinfo *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, var->type); + if (r < 0) + return r; + var->type = r; + var++; + } + break; + } + default: return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From e1d1dc4653ecdea55cb0e96844f88da62c65cd4f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 16 Apr 2019 11:47:17 -0700 Subject: libbpf: fix printf formatter for ptrdiff_t argument Using %ld for printing out value of ptrdiff_t type is not portable between 32-bit and 64-bit archs. This is causing compilation errors for libbpf on 32-bit platform (discovered as part of an effort to integrate libbpf into systemd ([0])). Proper formatter is %td, which is used in this patch. v2->v1: - add Reported-by - provide more context on how this issue was discovered [0] https://github.com/systemd/systemd/pull/12151 Reported-by: Evgeny Vereshchagin Cc: Daniel Borkmann Cc: Alexei Starovoitov Cc: Yonghong Song Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e5b77ad97795..d817bf20f3d6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -817,7 +817,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map, memcpy(*data_buff, data->d_buf, data->d_size); } - pr_debug("map %ld is \"%s\"\n", map - obj->maps, map->name); + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); return 0; } -- cgit v1.2.3-59-g8ed1b From d5e63fdd443378531fd1a67a15a720a799635d93 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 16 Apr 2019 14:58:09 +0200 Subject: libbpf: fix XDP socket ring buffer memory ordering The ring buffer code of XDP sockets is missing a memory barrier on the consumer side between the load of the data and the write that signals that it is ok for the producer to put new data into the buffer. On architectures that does not guarantee that stores are not reordered with older loads, the producer might put data into the ring before the consumer had the chance to read it. As IA does guarantee this ordering, it would only need a compiler barrier here, but there are no primitives in barrier.h for this specific case (hinder writes to be ordered before older reads) so I had to add a smp_mb() here which will translate into a run-time synch operation on IA. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/xsk.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index a497f00e2962..1b35c40dff73 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -36,6 +36,10 @@ struct name { \ DEFINE_XSK_RING(xsk_ring_prod); DEFINE_XSK_RING(xsk_ring_cons); +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + struct xsk_umem; struct xsk_socket; @@ -116,8 +120,8 @@ static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) { - /* Make sure everything has been written to the ring before signalling - * this to the kernel. + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. */ smp_wmb(); @@ -144,6 +148,11 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) { + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + smp_mb(); + *cons->consumer += nb; } -- cgit v1.2.3-59-g8ed1b From a06d729646e87afc5ce06e539aecf762cc26c6e3 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 16 Apr 2019 14:58:10 +0200 Subject: libbpf: remove likely/unlikely in xsk.h This patch removes the use of likely and unlikely in xsk.h since they create a dependency on Linux headers as reported by several users. There have also been reports that the use of these decreases performance as the compiler puts the code on two different cache lines instead of on a single one. All in all, I think we are better off without them. Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/xsk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 1b35c40dff73..f264f24f06ac 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -109,7 +109,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, size_t nb, __u32 *idx) { - if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + if (xsk_prod_nb_free(prod, nb) < nb) return 0; *idx = prod->cached_prod; @@ -133,7 +133,7 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, { size_t entries = xsk_cons_nb_avail(cons, nb); - if (likely(entries > 0)) { + if (entries > 0) { /* Make sure we do not speculatively read the data before * we have received the packet buffers from the ring. */ -- cgit v1.2.3-59-g8ed1b From b7e3a28019c92ffe1f55de278c5641de33b6259a Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 16 Apr 2019 14:58:11 +0200 Subject: libbpf: remove dependency on barrier.h in xsk.h The use of smp_rmb() and smp_wmb() creates a Linux header dependency on barrier.h that is unnecessary in most parts. This patch implements the two small defines that are needed from barrier.h. As a bonus, the new implementations are faster than the default ones as they default to sfence and lfence for x86, while we only need a compiler barrier in our case. Just as it is when the same ring access code is compiled in the kernel. Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_util.h | 25 +++++++++++++++++++++++++ tools/lib/bpf/xsk.h | 7 ++++--- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 81ecda0cb9c9..71fbb2898b87 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -23,6 +23,31 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/* Use these barrier functions instead of smp_[rw]mb() when they are + * used in a libbpf header file. That way they can be built into the + * application that uses libbpf. + */ +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_rmb() asm volatile("" : : : "memory") +# define libbpf_smp_wmb() asm volatile("" : : : "memory") +# define libbpf_smp_mb() \ + asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") +#elif defined(__aarch64__) +# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +#elif defined(__arm__) +/* These are only valid for armv7 and above */ +# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") +# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +#else +# warning Architecture missing native barrier functions in libbpf_util.h. +# define libbpf_smp_rmb() __sync_synchronize() +# define libbpf_smp_wmb() __sync_synchronize() +# define libbpf_smp_mb() __sync_synchronize() +#endif + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index f264f24f06ac..2377c7a7f1b1 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -16,6 +16,7 @@ #include #include "libbpf.h" +#include "libbpf_util.h" #ifdef __cplusplus extern "C" { @@ -123,7 +124,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) /* Make sure everything has been written to the ring before indicating * this to the kernel by writing the producer pointer. */ - smp_wmb(); + libbpf_smp_wmb(); *prod->producer += nb; } @@ -137,7 +138,7 @@ static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, /* Make sure we do not speculatively read the data before * we have received the packet buffers from the ring. */ - smp_rmb(); + libbpf_smp_rmb(); *idx = cons->cached_cons; cons->cached_cons += entries; @@ -151,7 +152,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) /* Make sure data has been read before indicating we are done * with the entries by updating the consumer pointer. */ - smp_mb(); + libbpf_smp_mb(); *cons->consumer += nb; } -- cgit v1.2.3-59-g8ed1b From 2c5935f1b2b642cee8e1562396ec8a7781fc4c6d Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 16 Apr 2019 14:58:12 +0200 Subject: libbpf: optimize barrier for XDP socket rings The full memory barrier in the XDP socket rings on the consumer side between the load of the data and the store of the consumer ring is there to protect the store from being executed before the load of the data. If this was allowed to happen, the producer might overwrite the data field with a new entry before the consumer got the chance to read it. On x86, stores are guaranteed not to be reordered with older loads, so it does not need a full memory barrier here. A compile time barrier would be enough. This patch introdcues a new primitive in libbpf_util.h that implements a new barrier type (libbpf_smp_rwmb) hindering stores to be reordered with older loads. It is then used in the XDP socket ring access code in libbpf to improve performance. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_util.h | 5 +++++ tools/lib/bpf/xsk.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 71fbb2898b87..172b707e007b 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -32,20 +32,25 @@ do { \ # define libbpf_smp_wmb() asm volatile("" : : : "memory") # define libbpf_smp_mb() \ asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc") +/* Hinders stores to be observed before older loads. */ +# define libbpf_smp_rwmb() asm volatile("" : : : "memory") #elif defined(__aarch64__) # define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory") # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() #elif defined(__arm__) /* These are only valid for armv7 and above */ # define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory") # define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory") # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") +# define libbpf_smp_rwmb() libbpf_smp_mb() #else # warning Architecture missing native barrier functions in libbpf_util.h. # define libbpf_smp_rmb() __sync_synchronize() # define libbpf_smp_wmb() __sync_synchronize() # define libbpf_smp_mb() __sync_synchronize() +# define libbpf_smp_rwmb() __sync_synchronize() #endif #ifdef __cplusplus diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 2377c7a7f1b1..82ea71a0f3ec 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -152,7 +152,7 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) /* Make sure data has been read before indicating we are done * with the entries by updating the consumer pointer. */ - libbpf_smp_mb(); + libbpf_smp_rwmb(); *cons->consumer += nb; } -- cgit v1.2.3-59-g8ed1b From 79b1b30e4c209c2ff1ddd6559b41dba1dfc8bcd8 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 18 Apr 2019 09:21:10 +0200 Subject: libbpf: remove compile time warning from libbpf_util.h Having a helpful compile time warning in libbpf_util.h is not a good idea since all warnings are treated as errors. Change this into a comment in the code instead. Fixes: b7e3a28019c9 ("libbpf: remove dependency on barrier.h in xsk.h") Signed-off-by: Magnus Karlsson Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h index 172b707e007b..da94c4cb2e4d 100644 --- a/tools/lib/bpf/libbpf_util.h +++ b/tools/lib/bpf/libbpf_util.h @@ -46,7 +46,7 @@ do { \ # define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory") # define libbpf_smp_rwmb() libbpf_smp_mb() #else -# warning Architecture missing native barrier functions in libbpf_util.h. +/* Architecture missing native barrier functions. */ # define libbpf_smp_rmb() __sync_synchronize() # define libbpf_smp_wmb() __sync_synchronize() # define libbpf_smp_mb() __sync_synchronize() -- cgit v1.2.3-59-g8ed1b From 4519efa6f8ea343e43ade21b0189b0b295439202 Mon Sep 17 00:00:00 2001 From: "McCabe, Robert J" Date: Fri, 19 Apr 2019 18:37:20 -0500 Subject: libbpf: fix BPF_LOG_BUF_SIZE off-by-one error The BPF_PROG_LOAD condition for kernel version <= 5.1 is log->len_total > UINT_MAX >> 8 /* (16 * 1024 * 1024) - 1 */ Signed-off-by: McCabe, Robert J Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index bc30783d1403..ff2506fe6bd2 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -92,7 +92,7 @@ struct bpf_load_program_attr { #define MAPS_RELAX_COMPAT 0x01 /* Recommend log buffer size */ -#define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */ +#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz); -- cgit v1.2.3-59-g8ed1b From 8837fe5dd09bd0331b3e2d1b6e400b7fcda8963a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Apr 2019 00:45:56 +0200 Subject: bpf, libbpf: handle old kernels more graceful wrt global data sections Andrii reported a corner case where e.g. global static data is present in the BPF ELF file in form of .data/.bss/.rodata section, but without any relocations to it. Such programs could be loaded before commit d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections"), whereas afterwards if kernel lacks support then loading would fail. Add a probing mechanism which skips setting up libbpf internal maps in case of missing kernel support. In presence of relocation entries, we abort the load attempt. Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections") Reported-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 99 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 86 insertions(+), 13 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d817bf20f3d6..85315dedbde4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -126,6 +126,8 @@ static inline __u64 ptr_to_u64(const void *ptr) struct bpf_capabilities { /* v4.14: kernel support for program & map names. */ __u32 name:1; + /* v5.2: kernel support for global data sections. */ + __u32 global_data:1; }; /* @@ -854,12 +856,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) * * TODO: Detect array of map and report error. */ - if (obj->efile.data_shndx >= 0) - nr_maps_glob++; - if (obj->efile.rodata_shndx >= 0) - nr_maps_glob++; - if (obj->efile.bss_shndx >= 0) - nr_maps_glob++; + if (obj->caps.global_data) { + if (obj->efile.data_shndx >= 0) + nr_maps_glob++; + if (obj->efile.rodata_shndx >= 0) + nr_maps_glob++; + if (obj->efile.bss_shndx >= 0) + nr_maps_glob++; + } + for (i = 0; data && i < nr_syms; i++) { GElf_Sym sym; @@ -971,6 +976,9 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) map_idx++; } + if (!obj->caps.global_data) + goto finalize; + /* * Populate rest of obj->maps with libbpf internal maps. */ @@ -988,6 +996,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++], LIBBPF_MAP_BSS, obj->efile.bss, NULL); +finalize: if (!ret) qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map); @@ -1333,11 +1342,17 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, if (bpf_object__shndx_is_maps(obj, shdr_idx) || bpf_object__shndx_is_data(obj, shdr_idx)) { type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); - if (type != LIBBPF_MAP_UNSPEC && - GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { - pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", - name, insn_idx, insns[insn_idx].code); - return -LIBBPF_ERRNO__RELOC; + if (type != LIBBPF_MAP_UNSPEC) { + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { + pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", + name, insn_idx, insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } + if (!obj->caps.global_data) { + pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } } for (map_idx = 0; map_idx < nr_maps; map_idx++) { @@ -1495,10 +1510,68 @@ bpf_object__probe_name(struct bpf_object *obj) return 0; } +static int +bpf_object__probe_global_data(struct bpf_object *obj) +{ + struct bpf_load_program_attr prg_attr; + struct bpf_create_map_attr map_attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret, map; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_ARRAY; + map_attr.key_size = sizeof(int); + map_attr.value_size = 32; + map_attr.max_entries = 1; + + map = bpf_create_map_xattr(&map_attr); + if (map < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); + return -errno; + } + + insns[0].imm = map; + + memset(&prg_attr, 0, sizeof(prg_attr)); + prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + prg_attr.insns = insns; + prg_attr.insns_cnt = ARRAY_SIZE(insns); + prg_attr.license = "GPL"; + + ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + if (ret >= 0) { + obj->caps.global_data = 1; + close(ret); + } + + close(map); + return 0; +} + static int bpf_object__probe_caps(struct bpf_object *obj) { - return bpf_object__probe_name(obj); + int (*probe_fn[])(struct bpf_object *obj) = { + bpf_object__probe_name, + bpf_object__probe_global_data, + }; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { + ret = probe_fn[i](obj); + if (ret < 0) + return ret; + } + + return 0; } static int @@ -2100,6 +2173,7 @@ __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); @@ -2193,7 +2267,6 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; - CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); -- cgit v1.2.3-59-g8ed1b From 4f8827d2b61ed32133e51f6a782bb69d80c7c3d4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 24 Apr 2019 00:45:57 +0200 Subject: bpf, libbpf: fix segfault in bpf_object__init_maps' pr_debug statement Ran into it while testing; in bpf_object__init_maps() data can be NULL in the case where no map section is present. Therefore we simply cannot access data->d_size before NULL test. Move the pr_debug() where it's safe to access. Fixes: d859900c4c56 ("bpf, libbpf: support global data/bss/rodata sections") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 85315dedbde4..9052061ba7fc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -875,14 +875,14 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) nr_maps++; } - /* Alloc obj->maps and fill nr_maps. */ - pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, - nr_maps, data->d_size); if (!nr_maps && !nr_maps_glob) return 0; /* Assume equally sized map definitions */ if (data) { + pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, + nr_maps, data->d_size); + map_def_sz = data->d_size / nr_maps; if (!data->d_size || (data->d_size % nr_maps) != 0) { pr_warning("unable to determine map definition size " -- cgit v1.2.3-59-g8ed1b From 32e621e55496a0009f44fe4914cd4a23cade4984 Mon Sep 17 00:00:00 2001 From: "Daniel T. Lee" Date: Wed, 24 Apr 2019 05:24:56 +0900 Subject: libbpf: fix samples/bpf build failure due to undefined UINT32_MAX Currently, building bpf samples will cause the following error. ./tools/lib/bpf/bpf.h:132:27: error: 'UINT32_MAX' undeclared here (not in a function) .. #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ ^ ./samples/bpf/bpf_load.h:31:25: note: in expansion of macro 'BPF_LOG_BUF_SIZE' extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; ^~~~~~~~~~~~~~~~ Due to commit 4519efa6f8ea ("libbpf: fix BPF_LOG_BUF_SIZE off-by-one error") hard-coded size of BPF_LOG_BUF_SIZE has been replaced with UINT32_MAX which is defined in header. Even with this change, bpf selftests are running fine since these are built with clang and it includes header(-idirafter) from clang/6.0.0/include. (it has ) clang -I. -I./include/uapi -I../../../include/uapi -idirafter /usr/local/include -idirafter /usr/include \ -idirafter /usr/lib/llvm-6.0/lib/clang/6.0.0/include -idirafter /usr/include/x86_64-linux-gnu \ -Wno-compare-distinct-pointer-types -O2 -target bpf -emit-llvm -c progs/test_sysctl_prog.c -o - | \ llc -march=bpf -mcpu=generic -filetype=obj -o /linux/tools/testing/selftests/bpf/test_sysctl_prog.o But bpf samples are compiled with GCC, and it only searches and includes headers declared at the target file. As '#include ' hasn't been declared in tools/lib/bpf/bpf.h, it causes build failure of bpf samples. gcc -Wp,-MD,./samples/bpf/.sockex3_user.o.d -Wall -Wmissing-prototypes -Wstrict-prototypes \ -O2 -fomit-frame-pointer -std=gnu89 -I./usr/include -I./tools/lib/ -I./tools/testing/selftests/bpf/ \ -I./tools/ lib/ -I./tools/include -I./tools/perf -c -o ./samples/bpf/sockex3_user.o ./samples/bpf/sockex3_user.c; This commit add declaration of '#include ' to tools/lib/bpf/bpf.h to fix this problem. Signed-off-by: Daniel T. Lee Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index ff2506fe6bd2..9593fec75652 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef __cplusplus extern "C" { -- cgit v1.2.3-59-g8ed1b From 4635b0ae4d26f87cf68dbab6740955dd1ad67cf4 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Fri, 26 Apr 2019 11:49:50 -0700 Subject: tools: sync bpf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, and fixes up the error: enumeration value ‘BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE’ not handled in switch [-Werror=switch-enum] build errors it would otherwise cause in libbpf. Signed-off-by: Matt Mullins Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 10 +++++++++- tools/lib/bpf/libbpf.c | 1 + tools/lib/bpf/libbpf_probes.c | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 704bb69514a2..f7fa7a34a62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -168,6 +168,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_REUSEPORT, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, }; enum bpf_attach_type { @@ -1737,12 +1738,19 @@ union bpf_attr { * error if an eBPF program tries to set a callback that is not * supported in the current kernel. * - * The supported callback values that *argval* can combine are: + * *argval* is a flag array which can combine these flags: * * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) * + * Therefore, this function can be used to clear a callback flag by + * setting the appropriate bit to zero. e.g. to disable the RTO + * callback: + * + * **bpf_sock_ops_cb_flags_set(bpf_sock,** + * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)** + * * Here are some examples of where one could call such eBPF * program: * diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9052061ba7fc..11a65db4b93f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2136,6 +2136,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_CGROUP_SYSCTL: return false; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 0f25541632e3..80ee922f290c 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -93,6 +93,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_SK_MSG: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: -- cgit v1.2.3-59-g8ed1b From a19f89f3667c950ad13c1560e4abd8aa8526b6b1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 26 Apr 2019 16:39:44 -0700 Subject: bpf: Support BPF_MAP_TYPE_SK_STORAGE in bpf map probing This patch supports probing for the new BPF_MAP_TYPE_SK_STORAGE. BPF_MAP_TYPE_SK_STORAGE enforces BTF usage, so the new probe requires to create and load a BTF also. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/map.c | 1 + tools/lib/bpf/libbpf_probes.c | 74 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index e6dcb3653a77..e951d45c0131 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -46,6 +46,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", [BPF_MAP_TYPE_QUEUE] = "queue", [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 80ee922f290c..a2c64a9ce1a6 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -131,11 +132,65 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) return errno != EINVAL && errno != EOPNOTSUPP; } +static int load_btf(void) +{ +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) \ + (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) +#define BTF_MEMBER_ENC(name, type, bits_offset) \ + (name), (type), (bits_offset) + + const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l"; + /* struct bpf_spin_lock { + * int val; + * }; + * struct val { + * int cnt; + * struct bpf_spin_lock l; + * }; + */ + __u32 btf_raw_types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* struct bpf_spin_lock */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), + BTF_MEMBER_ENC(15, 1, 0), /* int val; */ + /* struct val */ /* [3] */ + BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8), + BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */ + BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */ + }; + struct btf_header btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(btf_raw_types), + .str_off = sizeof(btf_raw_types), + .str_len = sizeof(btf_str_sec), + }; + __u8 raw_btf[sizeof(struct btf_header) + sizeof(btf_raw_types) + + sizeof(btf_str_sec)]; + + memcpy(raw_btf, &btf_hdr, sizeof(btf_hdr)); + memcpy(raw_btf + sizeof(btf_hdr), btf_raw_types, sizeof(btf_raw_types)); + memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); + + return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); +} + bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) { int key_size, value_size, max_entries, map_flags; + __u32 btf_key_type_id = 0, btf_value_type_id = 0; struct bpf_create_map_attr attr = {}; - int fd = -1, fd_inner; + int fd = -1, btf_fd = -1, fd_inner; key_size = sizeof(__u32); value_size = sizeof(__u32); @@ -161,6 +216,16 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_STACK: key_size = 0; break; + case BPF_MAP_TYPE_SK_STORAGE: + btf_key_type_id = 1; + btf_value_type_id = 3; + value_size = 8; + max_entries = 0; + map_flags = BPF_F_NO_PREALLOC; + btf_fd = load_btf(); + if (btf_fd < 0) + return false; + break; case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: @@ -206,11 +271,18 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) attr.max_entries = max_entries; attr.map_flags = map_flags; attr.map_ifindex = ifindex; + if (btf_fd >= 0) { + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_key_type_id; + attr.btf_value_type_id = btf_value_type_id; + } fd = bpf_create_map_xattr(&attr); } if (fd >= 0) close(fd); + if (btf_fd >= 0) + close(btf_fd); return fd >= 0; } -- cgit v1.2.3-59-g8ed1b From 0e6741f092979535d159d5a851f12c88bfb7cb9a Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 30 Apr 2019 14:45:35 +0200 Subject: libbpf: fix invalid munmap call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When unmapping the AF_XDP memory regions used for the rings, an invalid address was passed to the munmap() calls. Instead of passing the beginning of the memory region, the descriptor region was passed to munmap. When the userspace application tried to tear down an AF_XDP socket, the operation failed and the application would still have a reference to socket it wished to get rid of. Reported-by: William Tu Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Björn Töpel Tested-by: William Tu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/xsk.c | 77 ++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 557ef8d1250d..78b4a4bd4a25 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -248,8 +248,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, return 0; out_mmap: - munmap(umem->fill, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); out_socket: close(umem->fd); out_umem_alloc: @@ -524,11 +523,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, const struct xsk_socket_config *usr_config) { + void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; socklen_t optlen; - void *map; int err; if (!umem || !xsk_ptr || !rx || !tx) @@ -594,40 +593,40 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } if (rx) { - map = xsk_mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); - if (map == MAP_FAILED) { + rx_map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (rx_map == MAP_FAILED) { err = -errno; goto out_socket; } rx->mask = xsk->config.rx_size - 1; rx->size = xsk->config.rx_size; - rx->producer = map + off.rx.producer; - rx->consumer = map + off.rx.consumer; - rx->ring = map + off.rx.desc; + rx->producer = rx_map + off.rx.producer; + rx->consumer = rx_map + off.rx.consumer; + rx->ring = rx_map + off.rx.desc; } xsk->rx = rx; if (tx) { - map = xsk_mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); - if (map == MAP_FAILED) { + tx_map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (tx_map == MAP_FAILED) { err = -errno; goto out_mmap_rx; } tx->mask = xsk->config.tx_size - 1; tx->size = xsk->config.tx_size; - tx->producer = map + off.tx.producer; - tx->consumer = map + off.tx.consumer; - tx->ring = map + off.tx.desc; + tx->producer = tx_map + off.tx.producer; + tx->consumer = tx_map + off.tx.consumer; + tx->ring = tx_map + off.tx.desc; tx->cached_cons = xsk->config.tx_size; } xsk->tx = tx; @@ -654,13 +653,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, out_mmap_tx: if (tx) - munmap(xsk->tx, - off.tx.desc + + munmap(tx_map, off.tx.desc + xsk->config.tx_size * sizeof(struct xdp_desc)); out_mmap_rx: if (rx) - munmap(xsk->rx, - off.rx.desc + + munmap(rx_map, off.rx.desc + xsk->config.rx_size * sizeof(struct xdp_desc)); out_socket: if (--umem->refcount) @@ -685,10 +682,12 @@ int xsk_umem__delete(struct xsk_umem *umem) optlen = sizeof(off); err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { - munmap(umem->fill->ring, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); - munmap(umem->comp->ring, - off.cr.desc + umem->config.comp_size * sizeof(__u64)); + (void)munmap(umem->fill->ring - off.fr.desc, + off.fr.desc + + umem->config.fill_size * sizeof(__u64)); + (void)munmap(umem->comp->ring - off.cr.desc, + off.cr.desc + + umem->config.comp_size * sizeof(__u64)); } close(umem->fd); @@ -699,6 +698,7 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { + size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; socklen_t optlen; int err; @@ -711,14 +711,17 @@ void xsk_socket__delete(struct xsk_socket *xsk) optlen = sizeof(off); err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { - if (xsk->rx) - munmap(xsk->rx->ring, - off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc)); - if (xsk->tx) - munmap(xsk->tx->ring, - off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc)); + if (xsk->rx) { + (void)munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + (void)munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + + xsk->config.tx_size * desc_sz); + } + } xsk->umem->refcount--; -- cgit v1.2.3-59-g8ed1b From 5750902a6e9bc6adb77da8257c0e34db2bfdebb2 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 30 Apr 2019 14:45:36 +0200 Subject: libbpf: proper XSKMAP cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_map_update_elem() function, when used on an XSKMAP, will fail if not a valid AF_XDP socket is passed as value. Therefore, this is function cannot be used to clear the XSKMAP. Instead, the bpf_map_delete_elem() function should be used for that. This patch also simplifies the code by breaking up xsk_update_bpf_maps() into three smaller functions. Reported-by: William Tu Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Björn Töpel Tested-by: William Tu Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/xsk.c | 115 +++++++++++++++++++++++++++------------------------- 1 file changed, 60 insertions(+), 55 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 78b4a4bd4a25..525f1cc163b5 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -387,21 +387,17 @@ static void xsk_delete_bpf_maps(struct xsk_socket *xsk) { close(xsk->qidconf_map_fd); close(xsk->xsks_map_fd); + xsk->qidconf_map_fd = -1; + xsk->xsks_map_fd = -1; } -static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, - int xsks_value) +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) { - bool qidconf_map_updated = false, xsks_map_updated = false; + __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); + __u32 map_len = sizeof(struct bpf_map_info); struct bpf_prog_info prog_info = {}; - __u32 prog_len = sizeof(prog_info); struct bpf_map_info map_info; - __u32 map_len = sizeof(map_info); - __u32 *map_ids; - int reset_value = 0; - __u32 num_maps; - unsigned int i; - int err; + int fd, err; err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); if (err) @@ -422,66 +418,71 @@ static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, goto out_map_ids; for (i = 0; i < prog_info.nr_map_ids; i++) { - int fd; + if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1) + break; fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) { - err = -errno; - goto out_maps; - } + if (fd < 0) + continue; err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) - goto out_maps; + if (err) { + close(fd); + continue; + } if (!strcmp(map_info.name, "qidconf_map")) { - err = bpf_map_update_elem(fd, &xsk->queue_id, - &qidconf_value, 0); - if (err) - goto out_maps; - qidconf_map_updated = true; xsk->qidconf_map_fd = fd; - } else if (!strcmp(map_info.name, "xsks_map")) { - err = bpf_map_update_elem(fd, &xsk->queue_id, - &xsks_value, 0); - if (err) - goto out_maps; - xsks_map_updated = true; + continue; + } + + if (!strcmp(map_info.name, "xsks_map")) { xsk->xsks_map_fd = fd; + continue; } - if (qidconf_map_updated && xsks_map_updated) - break; + close(fd); } - if (!(qidconf_map_updated && xsks_map_updated)) { + err = 0; + if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) { err = -ENOENT; - goto out_maps; + xsk_delete_bpf_maps(xsk); } - err = 0; - goto out_success; - -out_maps: - if (qidconf_map_updated) - (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, - &reset_value, 0); - if (xsks_map_updated) - (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, - &reset_value, 0); -out_success: - if (qidconf_map_updated) - close(xsk->qidconf_map_fd); - if (xsks_map_updated) - close(xsk->xsks_map_fd); out_map_ids: free(map_ids); return err; } +static void xsk_clear_bpf_maps(struct xsk_socket *xsk) +{ + int qid = false; + + (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); + (void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); +} + +static int xsk_set_bpf_maps(struct xsk_socket *xsk) +{ + int qid = true, fd = xsk->fd, err; + + err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); + if (err) + goto out; + + err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0); + if (err) + goto out; + + return 0; +out: + xsk_clear_bpf_maps(xsk); + return err; +} + static int xsk_setup_xdp_prog(struct xsk_socket *xsk) { - bool prog_attached = false; __u32 prog_id = 0; int err; @@ -491,7 +492,6 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) return err; if (!prog_id) { - prog_attached = true; err = xsk_create_bpf_maps(xsk); if (err) return err; @@ -501,20 +501,21 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) goto out_maps; } else { xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto out_load; } - err = xsk_update_bpf_maps(xsk, true, xsk->fd); + err = xsk_set_bpf_maps(xsk); if (err) goto out_load; return 0; out_load: - if (prog_attached) - close(xsk->prog_fd); + close(xsk->prog_fd); out_maps: - if (prog_attached) - xsk_delete_bpf_maps(xsk); + xsk_delete_bpf_maps(xsk); return err; } @@ -642,6 +643,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, goto out_mmap_tx; } + xsk->qidconf_map_fd = -1; + xsk->xsks_map_fd = -1; + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); if (err) @@ -706,7 +710,8 @@ void xsk_socket__delete(struct xsk_socket *xsk) if (!xsk) return; - (void)xsk_update_bpf_maps(xsk, 0, 0); + xsk_clear_bpf_maps(xsk); + xsk_delete_bpf_maps(xsk); optlen = sizeof(off); err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); -- cgit v1.2.3-59-g8ed1b From ca31ca8247e2d3807ff5fa1d1760616a2292001c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 2 May 2019 08:56:50 -0700 Subject: tools/bpf: fix perf build error with uClibc (seen on ARC) When build perf for ARC recently, there was a build failure due to lack of __NR_bpf. | Auto-detecting system features: | | ... get_cpuid: [ OFF ] | ... bpf: [ on ] | | # error __NR_bpf not defined. libbpf does not support your arch. ^~~~~ | bpf.c: In function 'sys_bpf': | bpf.c:66:17: error: '__NR_bpf' undeclared (first use in this function) | return syscall(__NR_bpf, cmd, attr, size); | ^~~~~~~~ | sys_bpf Signed-off-by: Vineet Gupta Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 955191c64b64..c4a48086dc9a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # define __NR_bpf 349 # elif defined(__s390__) # define __NR_bpf 351 +# elif defined(__arc__) +# define __NR_bpf 280 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif -- cgit v1.2.3-59-g8ed1b From 7080da8909844602b650c8959ecd7814d2829ea5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 2 May 2019 11:33:38 -0700 Subject: libbpf: add libbpf_util.h to header install. The libbpf_util.h is used by xsk.h, so add it to the install headers. Reported-by: Ben Pfaff Signed-off-by: William Tu Acked-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c6c06bc6683c..f91639bf5650 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -230,6 +230,7 @@ install_headers: $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); install_pkgconfig: $(PC_FILE) -- cgit v1.2.3-59-g8ed1b From d24ed99b3b270c6de8f47c25d709b5f6ef7d3807 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 6 May 2019 11:24:43 +0200 Subject: libbpf: remove unnecessary cast-to-void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patches with fixes tags added a cast-to-void in the places when the return value of a function was ignored. This is not common practice in the kernel, and is therefore removed in this patch. Reported-by: Daniel Borkmann Fixes: 5750902a6e9b ("libbpf: proper XSKMAP cleanup") Fixes: 0e6741f09297 ("libbpf: fix invalid munmap call") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 525f1cc163b5..a3d1a302bc9c 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -459,8 +459,8 @@ static void xsk_clear_bpf_maps(struct xsk_socket *xsk) { int qid = false; - (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); - (void)bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); + bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0); + bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); } static int xsk_set_bpf_maps(struct xsk_socket *xsk) @@ -686,12 +686,10 @@ int xsk_umem__delete(struct xsk_umem *umem) optlen = sizeof(off); err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { - (void)munmap(umem->fill->ring - off.fr.desc, - off.fr.desc + - umem->config.fill_size * sizeof(__u64)); - (void)munmap(umem->comp->ring - off.cr.desc, - off.cr.desc + - umem->config.comp_size * sizeof(__u64)); + munmap(umem->fill->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); } close(umem->fd); @@ -717,14 +715,12 @@ void xsk_socket__delete(struct xsk_socket *xsk) err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); if (!err) { if (xsk->rx) { - (void)munmap(xsk->rx->ring - off.rx.desc, - off.rx.desc + - xsk->config.rx_size * desc_sz); + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); } if (xsk->tx) { - (void)munmap(xsk->tx->ring - off.tx.desc, - off.tx.desc + - xsk->config.tx_size * desc_sz); + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); } } -- cgit v1.2.3-59-g8ed1b