diff options
Diffstat (limited to 'tools')
44 files changed, 2847 insertions, 255 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d21deb46f49f..8790b3962e4b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1039,6 +1039,7 @@ enum bpf_attach_type { BPF_NETFILTER, BPF_TCX_INGRESS, BPF_TCX_EGRESS, + BPF_TRACE_UPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1057,6 +1058,7 @@ enum bpf_link_type { BPF_LINK_TYPE_STRUCT_OPS = 9, BPF_LINK_TYPE_NETFILTER = 10, BPF_LINK_TYPE_TCX = 11, + BPF_LINK_TYPE_UPROBE_MULTI = 12, MAX_BPF_LINK_TYPE, }; @@ -1186,7 +1188,16 @@ enum bpf_perf_event_type { /* link_create.kprobe_multi.flags used in LINK_CREATE command for * BPF_TRACE_KPROBE_MULTI attach type to create return probe. */ -#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) +enum { + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) +}; + +/* link_create.uprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. + */ +enum { + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) +}; /* link_create.netfilter.flags used in LINK_CREATE command for * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. @@ -1624,6 +1635,15 @@ union bpf_attr { }; __u64 expected_revision; } tcx; + struct { + __aligned_u64 path; + __aligned_u64 offsets; + __aligned_u64 ref_ctr_offsets; + __aligned_u64 cookies; + __u32 cnt; + __u32 flags; + __u32 pid; + } uprobe_multi; }; } link_create; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b8b0a6369363..2d0c282c8588 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o + usdt.o zip.o elf.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index c9b6b311a441..b0f1913763a3 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -767,6 +767,17 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, kprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_UPROBE_MULTI: + attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); + attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); + attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); + attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0)); + attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0)); + attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0)); + attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0); + if (!OPTS_ZEROED(opts, uprobe_multi)) + return libbpf_err(-EINVAL); + break; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 044a74ffc38a..74c2887cfd24 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -393,6 +393,15 @@ struct bpf_link_create_opts { const __u64 *cookies; } kprobe_multi; struct { + __u32 flags; + __u32 cnt; + const char *path; + const unsigned long *offsets; + const unsigned long *ref_ctr_offsets; + const __u64 *cookies; + __u32 pid; + } uprobe_multi; + struct { __u64 cookie; } tracing; struct { @@ -409,7 +418,7 @@ struct bpf_link_create_opts { }; size_t :0; }; -#define bpf_link_create_opts__last_field kprobe_multi.cookies +#define bpf_link_create_opts__last_field uprobe_multi.pid LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c new file mode 100644 index 000000000000..9d0296c1726a --- /dev/null +++ b/tools/lib/bpf/elf.c @@ -0,0 +1,440 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include <libelf.h> +#include <gelf.h> +#include <fcntl.h> +#include <linux/kernel.h> + +#include "libbpf_internal.h" +#include "str_error.h" + +#define STRERR_BUFSIZE 128 + +int elf_open(const char *binary_path, struct elf_fd *elf_fd) +{ + char errmsg[STRERR_BUFSIZE]; + int fd, ret; + Elf *elf; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("elf: failed to init libelf for %s\n", binary_path); + return -LIBBPF_ERRNO__LIBELF; + } + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("elf: failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + elf_fd->fd = fd; + elf_fd->elf = elf; + return 0; +} + +void elf_close(struct elf_fd *elf_fd) +{ + if (!elf_fd) + return; + elf_end(elf_fd->elf); + close(elf_fd->fd); +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +struct elf_sym { + const char *name; + GElf_Sym sym; + GElf_Shdr sh; +}; + +struct elf_sym_iter { + Elf *elf; + Elf_Data *syms; + size_t nr_syms; + size_t strtabidx; + size_t next_sym_idx; + struct elf_sym sym; + int st_type; +}; + +static int elf_sym_iter_new(struct elf_sym_iter *iter, + Elf *elf, const char *binary_path, + int sh_type, int st_type) +{ + Elf_Scn *scn = NULL; + GElf_Ehdr ehdr; + GElf_Shdr sh; + + memset(iter, 0, sizeof(*iter)); + + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + return -EINVAL; + } + + scn = elf_find_next_scn_by_type(elf, sh_type, NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + return -ENOENT; + } + + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + + iter->strtabidx = sh.sh_link; + iter->syms = elf_getdata(scn, 0); + if (!iter->syms) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + return -EINVAL; + } + iter->nr_syms = iter->syms->d_size / sh.sh_entsize; + iter->elf = elf; + iter->st_type = st_type; + return 0; +} + +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) +{ + struct elf_sym *ret = &iter->sym; + GElf_Sym *sym = &ret->sym; + const char *name = NULL; + Elf_Scn *sym_scn; + size_t idx; + + for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) { + if (!gelf_getsym(iter->syms, idx, sym)) + continue; + if (GELF_ST_TYPE(sym->st_info) != iter->st_type) + continue; + name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name); + if (!name) + continue; + sym_scn = elf_getscn(iter->elf, sym->st_shndx); + if (!sym_scn) + continue; + if (!gelf_getshdr(sym_scn, &ret->sh)) + continue; + + iter->next_sym_idx = idx + 1; + ret->name = name; + return ret; + } + + return NULL; +} + + +/* Transform symbol's virtual address (absolute for binaries and relative + * for shared libs) into file offset, which is what kernel is expecting + * for uprobe/uretprobe attachment. + * See Documentation/trace/uprobetracer.rst for more details. This is done + * by looking up symbol's containing section's header and using iter's virtual + * address (sh_addr) and corresponding file offset (sh_offset) to transform + * sym.st_value (virtual address) into desired final file offset. + */ +static unsigned long elf_sym_offset(struct elf_sym *sym) +{ + return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset; +} + +/* Find offset of function name in the provided ELF object. "binary_path" is + * the path to the ELF binary represented by "elf", and only used for error + * reporting matters. "name" matches symbol name or name@@LIB for library + * functions. + */ +long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) +{ + int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + int last_bind = -1; + int cur_bind; + + ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); + if (ret == -ENOENT) + continue; + if (ret) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sym->name, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + continue; + + cur_bind = GELF_ST_BIND(sym->sym.st_info); + + if (ret > 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sym->name, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (cur_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + + ret = elf_sym_offset(sym); + last_bind = cur_bind; + } + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + return ret; +} + +/* Find offset of function name in ELF object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +long elf_find_func_offset_from_file(const char *binary_path, const char *name) +{ + struct elf_fd elf_fd; + long ret = -ENOENT; + + ret = elf_open(binary_path, &elf_fd); + if (ret) + return ret; + ret = elf_find_func_offset(elf_fd.elf, binary_path, name); + elf_close(&elf_fd); + return ret; +} + +struct symbol { + const char *name; + int bind; + int idx; +}; + +static int symbol_cmp(const void *a, const void *b) +{ + const struct symbol *sym_a = a; + const struct symbol *sym_b = b; + + return strcmp(sym_a->name, sym_b->name); +} + +/* + * Return offsets in @poffsets for symbols specified in @syms array argument. + * On success returns 0 and offsets are returned in allocated array with @cnt + * size, that needs to be released by the caller. + */ +int elf_resolve_syms_offsets(const char *binary_path, int cnt, + const char **syms, unsigned long **poffsets) +{ + int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + int err = 0, i, cnt_done = 0; + unsigned long *offsets; + struct symbol *symbols; + struct elf_fd elf_fd; + + err = elf_open(binary_path, &elf_fd); + if (err) + return err; + + offsets = calloc(cnt, sizeof(*offsets)); + symbols = calloc(cnt, sizeof(*symbols)); + + if (!offsets || !symbols) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < cnt; i++) { + symbols[i].name = syms[i]; + symbols[i].idx = i; + } + + qsort(symbols, cnt, sizeof(*symbols), symbol_cmp); + + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); + if (err == -ENOENT) + continue; + if (err) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + unsigned long sym_offset = elf_sym_offset(sym); + int bind = GELF_ST_BIND(sym->sym.st_info); + struct symbol *found, tmp = { + .name = sym->name, + }; + unsigned long *offset; + + found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp); + if (!found) + continue; + + offset = &offsets[found->idx]; + if (*offset > 0) { + /* same offset, no problem */ + if (*offset == sym_offset) + continue; + /* handle multiple matches */ + if (found->bind != STB_WEAK && bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n", + sym->name, sym_offset, binary_path, *offset); + err = -ESRCH; + goto out; + } else if (bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } else { + cnt_done++; + } + *offset = sym_offset; + found->bind = bind; + } + } + + if (cnt != cnt_done) { + err = -ENOENT; + goto out; + } + + *poffsets = offsets; + +out: + free(symbols); + if (err) + free(offsets); + elf_close(&elf_fd); + return err; +} + +/* + * Return offsets in @poffsets for symbols specified by @pattern argument. + * On success returns 0 and offsets are returned in allocated @poffsets + * array with the @pctn size, that needs to be released by the caller. + */ +int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, + unsigned long **poffsets, size_t *pcnt) +{ + int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM }; + unsigned long *offsets = NULL; + size_t cap = 0, cnt = 0; + struct elf_fd elf_fd; + int err = 0, i; + + err = elf_open(binary_path, &elf_fd); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + struct elf_sym_iter iter; + struct elf_sym *sym; + + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); + if (err == -ENOENT) + continue; + if (err) + goto out; + + while ((sym = elf_sym_iter_next(&iter))) { + if (!glob_match(sym->name, pattern)) + continue; + + err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets), + cnt + 1); + if (err) + goto out; + + offsets[cnt++] = elf_sym_offset(sym); + } + + /* If we found anything in the first symbol section, + * do not search others to avoid duplicates. + */ + if (cnt) + break; + } + + if (cnt) { + *poffsets = offsets; + *pcnt = cnt; + } else { + err = -ENOENT; + } + +out: + if (err) + free(offsets); + elf_close(&elf_fd); + return err; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b14a4376a86e..96ff1aa4bf6a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -120,6 +120,7 @@ static const char * const attach_type_name[] = { [BPF_NETFILTER] = "netfilter", [BPF_TCX_INGRESS] = "tcx_ingress", [BPF_TCX_EGRESS] = "tcx_egress", + [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", }; static const char * const link_type_name[] = { @@ -135,6 +136,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", [BPF_LINK_TYPE_NETFILTER] = "netfilter", [BPF_LINK_TYPE_TCX] = "tcx", + [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", }; static const char * const map_type_name[] = { @@ -365,6 +367,8 @@ enum sec_def_flags { SEC_SLEEPABLE = 8, /* BPF program support non-linear XDP buffer */ SEC_XDP_FRAGS = 16, + /* Setup proper attach type for usdt probes. */ + SEC_USDT = 32, }; struct bpf_sec_def { @@ -550,6 +554,7 @@ struct extern_desc { int btf_id; int sec_btf_id; const char *name; + char *essent_name; bool is_set; bool is_weak; union { @@ -3770,6 +3775,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) struct extern_desc *ext; int i, n, off, dummy_var_btf_id; const char *ext_name, *sec_name; + size_t ext_essent_len; Elf_Scn *scn; Elf64_Shdr *sh; @@ -3819,6 +3825,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; + ext_essent_len = bpf_core_essential_name_len(ext->name); + ext->essent_name = NULL; + if (ext_essent_len != strlen(ext->name)) { + ext->essent_name = strndup(ext->name, ext_essent_len); + if (!ext->essent_name) + return -ENOMEM; + } + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); if (ext->sec_btf_id <= 0) { pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", @@ -4817,6 +4831,39 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_uprobe_multi_link(void) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + static int probe_kern_bpf_cookie(void) { struct bpf_insn insns[] = { @@ -4913,6 +4960,9 @@ static struct kern_feature_desc { [FEAT_SYSCALL_WRAPPER] = { "Kernel using syscall wrapper", probe_kern_syscall_wrapper, }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -6780,6 +6830,10 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + /* special check for usdt to use uprobe_multi link */ + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) + prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; @@ -6848,7 +6902,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog if (!insns || !insns_cnt) return -EINVAL; - load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(obj, FEAT_PROG_NAME)) prog_name = prog->name; load_attr.attach_prog_fd = prog->attach_prog_fd; @@ -6884,6 +6937,9 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog insns_cnt = prog->insns_cnt; } + /* allow prog_prepare_load_fn to change expected_attach_type */ + load_attr.expected_attach_type = prog->expected_attach_type; + if (obj->gen_loader) { bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, license, insns, insns_cnt, &load_attr, @@ -7624,7 +7680,8 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, local_func_proto_id = ext->ksym.type_id; - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); + kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, + &mod_btf); if (kfunc_id < 0) { if (kfunc_id == -ESRCH && ext->is_weak) return 0; @@ -7639,6 +7696,9 @@ static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, kern_btf, kfunc_proto_id); if (ret <= 0) { + if (ext->is_weak) + return 0; + pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", ext->name, local_func_proto_id, mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); @@ -8316,6 +8376,21 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +int bpf_object__unpin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__unpin_programs(obj, path); + if (err) + return libbpf_err(err); + + err = bpf_object__unpin_maps(obj, path); + if (err) + return libbpf_err(err); + + return 0; +} + static void bpf_map__destroy(struct bpf_map *map) { if (map->inner_map) { @@ -8363,6 +8438,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object_unload(obj); btf__free(obj->btf); + btf__free(obj->btf_vmlinux); btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) @@ -8370,6 +8446,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); + + for (i = 0; i < obj->nr_extern; i++) + zfree(&obj->externs[i].essent_name); + zfree(&obj->externs); obj->nr_extern = 0; @@ -8681,6 +8761,7 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8696,9 +8777,14 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), - SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), + SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), + SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), @@ -10549,7 +10635,7 @@ struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, } /* Adapted from perf/util/string.c */ -static bool glob_match(const char *str, const char *pat) +bool glob_match(const char *str, const char *pat) { while (*str && *pat && *pat != '*') { if (*pat == '?') { /* Matches any single character */ @@ -10902,6 +10988,37 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru return libbpf_get_error(*link); } +static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + int n, ret = -EINVAL; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + &probe_type, &binary_path, &func_name); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 3: + opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + return ret; +} + static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, const char *binary_path, uint64_t offset) { @@ -10984,191 +11101,6 @@ err_clean_legacy: return err; } -/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ -static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) -{ - while ((scn = elf_nextscn(elf, scn)) != NULL) { - GElf_Shdr sh; - - if (!gelf_getshdr(scn, &sh)) - continue; - if (sh.sh_type == sh_type) - return scn; - } - return NULL; -} - -/* Find offset of function name in the provided ELF object. "binary_path" is - * the path to the ELF binary represented by "elf", and only used for error - * reporting matters. "name" matches symbol name or name@@LIB for library - * functions. - */ -static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) -{ - int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; - long ret = -ENOENT; - size_t name_len; - GElf_Ehdr ehdr; - - if (!gelf_getehdr(elf, &ehdr)) { - pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); - ret = -LIBBPF_ERRNO__FORMAT; - goto out; - } - /* for shared lib case, we do not need to calculate relative offset */ - is_shared_lib = ehdr.e_type == ET_DYN; - - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; - - /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if - * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically - * linked binary may not have SHT_DYMSYM, so absence of a section should not be - * reported as a warning/error. - */ - for (i = 0; i < ARRAY_SIZE(sh_types); i++) { - size_t nr_syms, strtabidx, idx; - Elf_Data *symbols = NULL; - Elf_Scn *scn = NULL; - int last_bind = -1; - const char *sname; - GElf_Shdr sh; - - scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); - if (!scn) { - pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", - binary_path); - continue; - } - if (!gelf_getshdr(scn, &sh)) - continue; - strtabidx = sh.sh_link; - symbols = elf_getdata(scn, 0); - if (!symbols) { - pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", - binary_path, elf_errmsg(-1)); - ret = -LIBBPF_ERRNO__FORMAT; - goto out; - } - nr_syms = symbols->d_size / sh.sh_entsize; - - for (idx = 0; idx < nr_syms; idx++) { - int curr_bind; - GElf_Sym sym; - Elf_Scn *sym_scn; - GElf_Shdr sym_sh; - - if (!gelf_getsym(symbols, idx, &sym)) - continue; - - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) - continue; - - sname = elf_strptr(elf, strtabidx, sym.st_name); - if (!sname) - continue; - - curr_bind = GELF_ST_BIND(sym.st_info); - - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sname, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') - continue; - - if (ret >= 0) { - /* handle multiple matches */ - if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { - /* Only accept one non-weak bind. */ - pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", - sname, name, binary_path); - ret = -LIBBPF_ERRNO__FORMAT; - goto out; - } else if (curr_bind == STB_WEAK) { - /* already have a non-weak bind, and - * this is a weak bind, so ignore. - */ - continue; - } - } - - /* Transform symbol's virtual address (absolute for - * binaries and relative for shared libs) into file - * offset, which is what kernel is expecting for - * uprobe/uretprobe attachment. - * See Documentation/trace/uprobetracer.rst for more - * details. - * This is done by looking up symbol's containing - * section's header and using it's virtual address - * (sh_addr) and corresponding file offset (sh_offset) - * to transform sym.st_value (virtual address) into - * desired final file offset. - */ - sym_scn = elf_getscn(elf, sym.st_shndx); - if (!sym_scn) - continue; - if (!gelf_getshdr(sym_scn, &sym_sh)) - continue; - - ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; - last_bind = curr_bind; - } - if (ret > 0) - break; - } - - if (ret > 0) { - pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, - ret); - } else { - if (ret == 0) { - pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, - is_shared_lib ? "should not be 0 in a shared library" : - "try using shared library path instead"); - ret = -ENOENT; - } else { - pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); - } - } -out: - return ret; -} - -/* Find offset of function name in ELF object specified by path. "name" matches - * symbol name or name@@LIB for library functions. - */ -static long elf_find_func_offset_from_file(const char *binary_path, const char *name) -{ - char errmsg[STRERR_BUFSIZE]; - long ret = -ENOENT; - Elf *elf; - int fd; - - fd = open(binary_path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - ret = -errno; - pr_warn("failed to open %s: %s\n", binary_path, - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); - return ret; - } - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); - if (!elf) { - pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); - close(fd); - return -LIBBPF_ERRNO__FORMAT; - } - - ret = elf_find_func_offset(elf, binary_path, name); - elf_end(elf); - close(fd); - return ret; -} - /* Find offset of function name in archive specified by path. Currently * supported are .zip files that do not compress their contents, as used on * Android in the form of APKs, for example. "file_name" is the name of the ELF @@ -11311,6 +11243,120 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) return -ENOENT; } +struct bpf_link * +bpf_program__attach_uprobe_multi(const struct bpf_program *prog, + pid_t pid, + const char *path, + const char *func_pattern, + const struct bpf_uprobe_multi_opts *opts) +{ + const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; + LIBBPF_OPTS(bpf_link_create_opts, lopts); + unsigned long *resolved_offsets = NULL; + int err = 0, link_fd, prog_fd; + struct bpf_link *link = NULL; + char errmsg[STRERR_BUFSIZE]; + char full_path[PATH_MAX]; + const __u64 *cookies; + const char **syms; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + syms = OPTS_GET(opts, syms, NULL); + offsets = OPTS_GET(opts, offsets, NULL); + ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); + cookies = OPTS_GET(opts, cookies, NULL); + cnt = OPTS_GET(opts, cnt, 0); + + /* + * User can specify 2 mutually exclusive set of inputs: + * + * 1) use only path/func_pattern/pid arguments + * + * 2) use path/pid with allowed combinations of: + * syms/offsets/ref_ctr_offsets/cookies/cnt + * + * - syms and offsets are mutually exclusive + * - ref_ctr_offsets and cookies are optional + * + * Any other usage results in error. + */ + + if (!path) + return libbpf_err_ptr(-EINVAL); + if (!func_pattern && cnt == 0) + return libbpf_err_ptr(-EINVAL); + + if (func_pattern) { + if (syms || offsets || ref_ctr_offsets || cookies || cnt) + return libbpf_err_ptr(-EINVAL); + } else { + if (!!syms == !!offsets) + return libbpf_err_ptr(-EINVAL); + } + + if (func_pattern) { + if (!strchr(path, '/')) { + err = resolve_full_path(path, full_path, sizeof(full_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, path, err); + return libbpf_err_ptr(err); + } + path = full_path; + } + + err = elf_resolve_pattern_offsets(path, func_pattern, + &resolved_offsets, &cnt); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } else if (syms) { + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets); + if (err < 0) + return libbpf_err_ptr(err); + offsets = resolved_offsets; + } + + lopts.uprobe_multi.path = path; + lopts.uprobe_multi.offsets = offsets; + lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; + lopts.uprobe_multi.cookies = cookies; + lopts.uprobe_multi.cnt = cnt; + lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; + + if (pid == 0) + pid = getpid(); + if (pid > 0) + lopts.uprobe_multi.pid = pid; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + prog_fd = bpf_program__fd(prog); + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", + prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto error; + } + link->fd = link_fd; + free(resolved_offsets); + return link; + +error: + free(resolved_offsets); + free(link); + return libbpf_err_ptr(err); +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 55b97b208754..0e52621cba43 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -266,6 +266,7 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); +LIBBPF_API int bpf_object__unpin(struct bpf_object *object, const char *path); LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); @@ -529,6 +530,57 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, const char *pattern, const struct bpf_kprobe_multi_opts *opts); +struct bpf_uprobe_multi_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* array of function symbols to attach to */ + const char **syms; + /* array of function addresses to attach to */ + const unsigned long *offsets; + /* optional, array of associated ref counter offsets */ + const unsigned long *ref_ctr_offsets; + /* optional, array of associated BPF cookies */ + const __u64 *cookies; + /* number of elements in syms/addrs/cookies arrays */ + size_t cnt; + /* create return uprobes */ + bool retprobe; + size_t :0; +}; + +#define bpf_uprobe_multi_opts__last_field retprobe + +/** + * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program + * to multiple uprobes with uprobe_multi link. + * + * User can specify 2 mutually exclusive set of inputs: + * + * 1) use only path/func_pattern/pid arguments + * + * 2) use path/pid with allowed combinations of + * syms/offsets/ref_ctr_offsets/cookies/cnt + * + * - syms and offsets are mutually exclusive + * - ref_ctr_offsets and cookies are optional + * + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary + * @param func_pattern Regular expression to specify functions to attach + * BPF program to + * @param opts Additional options (see **struct bpf_uprobe_multi_opts**) + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_uprobe_multi(const struct bpf_program *prog, + pid_t pid, + const char *binary_path, + const char *func_pattern, + const struct bpf_uprobe_multi_opts *opts); + struct bpf_ksyscall_opts { /* size of this struct, for forward/backward compatibility */ size_t sz; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 9c7538dd5835..57712321490f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -395,7 +395,9 @@ LIBBPF_1.2.0 { LIBBPF_1.3.0 { global: bpf_obj_pin_opts; + bpf_object__unpin; bpf_prog_detach_opts; bpf_program__attach_netfilter; bpf_program__attach_tcx; + bpf_program__attach_uprobe_multi; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index e4d05662a96c..f0f08635adb0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -15,6 +15,7 @@ #include <linux/err.h> #include <fcntl.h> #include <unistd.h> +#include <libelf.h> #include "relo_core.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ @@ -354,6 +355,8 @@ enum kern_feature_id { FEAT_BTF_ENUM64, /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ FEAT_SYSCALL_WRAPPER, + /* BPF multi-uprobe link support */ + FEAT_UPROBE_MULTI_LINK, __FEAT_CNT, }; @@ -577,4 +580,22 @@ static inline bool is_pow_of_2(size_t x) #define PROG_LOAD_ATTEMPTS 5 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); +bool glob_match(const char *str, const char *pat); + +long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name); +long elf_find_func_offset_from_file(const char *binary_path, const char *name); + +struct elf_fd { + Elf *elf; + int fd; +}; + +int elf_open(const char *binary_path, struct elf_fd *elf_fd); +void elf_close(struct elf_fd *elf_fd); + +int elf_resolve_syms_offsets(const char *binary_path, int cnt, + const char **syms, unsigned long **poffsets); +int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, + unsigned long **poffsets, size_t *pcnt); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index a26b2f5fa0fc..63a4d5ad12d1 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -776,7 +776,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, break; case BPF_CORE_FIELD_SIGNED: *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || - (btf_int_encoding(mt) & BTF_INT_SIGNED); + (btf_is_int(mt) && (btf_int_encoding(mt) & BTF_INT_SIGNED)); if (validate) *validate = true; /* signedness is never ambiguous */ break; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 37455d00b239..93794f01bb67 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -250,6 +250,7 @@ struct usdt_manager { bool has_bpf_cookie; bool has_sema_refcnt; + bool has_uprobe_multi; }; struct usdt_manager *usdt_manager_new(struct bpf_object *obj) @@ -284,6 +285,11 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj) */ man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0; + /* + * Detect kernel support for uprobe multi link to be used for attaching + * usdt probes. + */ + man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); return man; } @@ -808,6 +814,8 @@ struct bpf_link_usdt { long abs_ip; struct bpf_link *link; } *uprobes; + + struct bpf_link *multi_link; }; static int bpf_link_usdt_detach(struct bpf_link *link) @@ -816,6 +824,9 @@ static int bpf_link_usdt_detach(struct bpf_link *link) struct usdt_manager *man = usdt_link->usdt_man; int i; + bpf_link__destroy(usdt_link->multi_link); + + /* When having multi_link, uprobe_cnt is 0 */ for (i = 0; i < usdt_link->uprobe_cnt; i++) { /* detach underlying uprobe link */ bpf_link__destroy(usdt_link->uprobes[i].link); @@ -946,32 +957,24 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie) { - int i, fd, err, spec_map_fd, ip_map_fd; + unsigned long *offsets = NULL, *ref_ctr_offsets = NULL; + int i, err, spec_map_fd, ip_map_fd; LIBBPF_OPTS(bpf_uprobe_opts, opts); struct hashmap *specs_hash = NULL; struct bpf_link_usdt *link = NULL; struct usdt_target *targets = NULL; + __u64 *cookies = NULL; + struct elf_fd elf_fd; size_t target_cnt; - Elf *elf; spec_map_fd = bpf_map__fd(man->specs_map); ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); + err = elf_open(path, &elf_fd); + if (err) return libbpf_err_ptr(err); - } - - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); - if (!elf) { - err = -EBADF; - pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); - goto err_out; - } - err = sanity_check_usdt_elf(elf, path); + err = sanity_check_usdt_elf(elf_fd.elf, path); if (err) goto err_out; @@ -984,7 +987,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct /* discover USDT in given binary, optionally limiting * activations to a given PID, if pid > 0 */ - err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, + err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, usdt_cookie, &targets, &target_cnt); if (err <= 0) { err = (err == 0) ? -ENOENT : err; @@ -1007,10 +1010,21 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct link->link.detach = &bpf_link_usdt_detach; link->link.dealloc = &bpf_link_usdt_dealloc; - link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); - if (!link->uprobes) { - err = -ENOMEM; - goto err_out; + if (man->has_uprobe_multi) { + offsets = calloc(target_cnt, sizeof(*offsets)); + cookies = calloc(target_cnt, sizeof(*cookies)); + ref_ctr_offsets = calloc(target_cnt, sizeof(*ref_ctr_offsets)); + + if (!offsets || !ref_ctr_offsets || !cookies) { + err = -ENOMEM; + goto err_out; + } + } else { + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } } for (i = 0; i < target_cnt; i++) { @@ -1051,37 +1065,65 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct goto err_out; } - opts.ref_ctr_offset = target->sema_off; - opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; - uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, - target->rel_ip, &opts); - err = libbpf_get_error(uprobe_link); - if (err) { - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", - i, usdt_provider, usdt_name, path, err); + if (man->has_uprobe_multi) { + offsets[i] = target->rel_ip; + ref_ctr_offsets[i] = target->sema_off; + cookies[i] = spec_id; + } else { + opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", + i, usdt_provider, usdt_name, path, err); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + } + + if (man->has_uprobe_multi) { + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts_multi, + .ref_ctr_offsets = ref_ctr_offsets, + .offsets = offsets, + .cookies = cookies, + .cnt = target_cnt, + ); + + link->multi_link = bpf_program__attach_uprobe_multi(prog, pid, path, + NULL, &opts_multi); + if (!link->multi_link) { + err = -errno; + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", + usdt_provider, usdt_name, path, err); goto err_out; } - link->uprobes[i].link = uprobe_link; - link->uprobes[i].abs_ip = target->abs_ip; - link->uprobe_cnt++; + free(offsets); + free(ref_ctr_offsets); + free(cookies); } free(targets); hashmap__free(specs_hash); - elf_end(elf); - close(fd); - + elf_close(&elf_fd); return &link->link; err_out: + free(offsets); + free(ref_ctr_offsets); + free(cookies); + if (link) bpf_link__destroy(&link->link); free(targets); hashmap__free(specs_hash); - if (elf) - elf_end(elf); - close(fd); + elf_close(&elf_fd); return libbpf_err_ptr(err); } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 110518ba4804..f1aebabfb017 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -44,6 +44,7 @@ test_cpp /bench /veristat /sign-file +/uprobe_multi *.ko *.tmp xskxceiver diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e4e1e6492268..edef49fcd23e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -585,6 +585,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ + $(OUTPUT)/uprobe_multi \ ima_setup.sh \ verify_sig_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) \ @@ -698,6 +699,10 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +$(OUTPUT)/uprobe_multi: uprobe_multi.c + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 7ff32be3d730..68180d8f8558 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -81,15 +81,6 @@ void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat); -static inline __u64 get_time_ns(void) -{ - struct timespec t; - - clock_gettime(CLOCK_MONOTONIC, &t); - - return (u64)t.tv_sec * 1000000000 + t.tv_nsec; -} - static inline void atomic_inc(long *value) { (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3b350bc31343..1c7584e8dd9e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DUMMY=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y @@ -59,6 +60,7 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=y +CONFIG_NET_SCH_FQ=y CONFIG_NET_SCH_INGRESS=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 26b2d1bffdfd..1454cebc262b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -11,6 +11,7 @@ #include <bpf/btf.h> #include "test_bpf_cookie.skel.h" #include "kprobe_multi.skel.h" +#include "uprobe_multi.skel.h" /* uprobe attach point */ static noinline void trigger_func(void) @@ -239,6 +240,81 @@ cleanup: bpf_link__destroy(link1); kprobe_multi__destroy(skel); } + +/* defined in prog_tests/uprobe_multi_test.c */ +void uprobe_multi_func_1(void); +void uprobe_multi_func_2(void); +void uprobe_multi_func_3(void); + +static void uprobe_multi_test_run(struct uprobe_multi *skel) +{ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + skel->bss->pid = getpid(); + skel->bss->test_cookie = true; + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result"); +} + +static void uprobe_multi_attach_api_subtest(void) +{ + struct bpf_link *link1 = NULL, *link2 = NULL; + struct uprobe_multi *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + __u64 cookies[3]; + + cookies[0] = 3; /* uprobe_multi_func_1 */ + cookies[1] = 1; /* uprobe_multi_func_2 */ + cookies[2] = 2; /* uprobe_multi_func_3 */ + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + opts.cookies = &cookies[0]; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi")) + goto cleanup; + + link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + cookies[0] = 2; /* uprobe_multi_func_1 */ + cookies[1] = 3; /* uprobe_multi_func_2 */ + cookies[2] = 1; /* uprobe_multi_func_3 */ + + opts.retprobe = true; + link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe")) + goto cleanup; + + uprobe_multi_test_run(skel); + +cleanup: + bpf_link__destroy(link2); + bpf_link__destroy(link1); + uprobe_multi__destroy(skel); +} + static void uprobe_subtest(struct test_bpf_cookie *skel) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); @@ -515,6 +591,8 @@ void test_bpf_cookie(void) kprobe_multi_attach_api_subtest(); if (test__start_subtest("uprobe")) uprobe_subtest(skel); + if (test__start_subtest("multi_uprobe_attach_api")) + uprobe_multi_attach_api_subtest(); if (test__start_subtest("tracepoint")) tp_subtest(skel); if (test__start_subtest("perf_event")) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 2173c4bb555e..179fe300534f 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -304,14 +304,6 @@ cleanup: kprobe_multi__destroy(skel); } -static inline __u64 get_time_ns(void) -{ - struct timespec t; - - clock_gettime(CLOCK_MONOTONIC, &t); - return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; -} - static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index 76f1da877f81..b25b870f87ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -5,6 +5,7 @@ #include <network_helpers.h> #include "local_kptr_stash.skel.h" +#include "local_kptr_stash_fail.skel.h" static void test_local_kptr_stash_simple(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -26,6 +27,27 @@ static void test_local_kptr_stash_simple(void) local_kptr_stash__destroy(skel); } +static void test_local_kptr_stash_plain(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_plain run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_unstash(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -51,10 +73,19 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } -void test_local_kptr_stash_success(void) +static void test_local_kptr_stash_fail(void) +{ + RUN_TESTS(local_kptr_stash_fail); +} + +void test_local_kptr_stash(void) { if (test__start_subtest("local_kptr_stash_simple")) test_local_kptr_stash_simple(); + if (test__start_subtest("local_kptr_stash_plain")) + test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("local_kptr_stash_fail")) + test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h new file mode 100644 index 000000000000..61333f2a03f9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LWT_HELPERS_H +#define __LWT_HELPERS_H + +#include <time.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/icmp.h> + +#include "test_progs.h" + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +#define RUN_TEST(name) \ + ({ \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_create(), "netns_create")) { \ + struct nstoken *token = open_netns(NETNS); \ + if (ASSERT_OK_PTR(token, "setns")) { \ + test_ ## name(); \ + close_netns(token); \ + } \ + netns_delete(); \ + } \ + }) + +#define NETNS "ns_lwt" + +static inline int netns_create(void) +{ + return system("ip netns add " NETNS); +} + +static inline int netns_delete(void) +{ + return system("ip netns del " NETNS ">/dev/null 2>&1"); +} + +static int open_tuntap(const char *dev_name, bool need_mac) +{ + int err = 0; + struct ifreq ifr; + int fd = open("/dev/net/tun", O_RDWR); + + if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) + return -1; + + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); + memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { + close(fd); + return -1; + } + + err = fcntl(fd, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(fd); + return -1; + } + + return fd; +} + +#define ICMP_PAYLOAD_SIZE 100 + +/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ +static int __expect_icmp_ipv4(char *buf, ssize_t len) +{ + struct iphdr *ip = (struct iphdr *)buf; + struct icmphdr *icmp = (struct icmphdr *)(ip + 1); + ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp); + + if (len < min_header_len) + return -1; + + if (ip->protocol != IPPROTO_ICMP) + return -1; + + if (icmp->type != ICMP_ECHO) + return -1; + + return len == ICMP_PAYLOAD_SIZE + min_header_len; +} + +typedef int (*filter_t) (char *, ssize_t); + +/* wait_for_packet - wait for a packet that matches the filter + * + * @fd: tun fd/packet socket to read packet + * @filter: filter function, returning 1 if matches + * @timeout: timeout to wait for the packet + * + * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error. + */ +static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout) +{ + char buf[4096]; + int max_retry = 5; /* in case we read some spurious packets */ + fd_set fds; + + FD_ZERO(&fds); + while (max_retry--) { + /* Linux modifies timeout arg... So make a copy */ + struct timeval copied_timeout = *timeout; + ssize_t ret = -1; + + FD_SET(fd, &fds); + + ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout); + if (ret <= 0) { + if (errno == EINTR) + continue; + else if (errno == EAGAIN || ret == 0) + return 0; + + log_err("select failed"); + return -1; + } + + ret = read(fd, buf, sizeof(buf)); + + if (ret <= 0) { + log_err("read(dev): %ld", ret); + return -1; + } + + if (filter && filter(buf, ret) > 0) + return 1; + } + + return 0; +} + +#endif /* __LWT_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c new file mode 100644 index 000000000000..59b38569f310 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Test suite of lwt_xmit BPF programs that redirect packets + * The file tests focus not only if these programs work as expected normally, + * but also if they can handle abnormal situations gracefully. + * + * WARNING + * ------- + * This test suite may crash the kernel, thus should be run in a VM. + * + * Setup: + * --------- + * All tests are performed in a single netns. Two lwt encap routes are setup for + * each subtest: + * + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err + * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err + * + * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section + * of this object holds a program entry to test. The BPF object is built from + * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the + * attachment for lwt programs are not supported by libbpf yet. + * + * For testing, ping commands are run in the test netns: + * + * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100 + * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100 + * + * Scenarios: + * -------------------------------- + * 1. Redirect to a running tap/tun device + * 2. Redirect to a down tap/tun device + * 3. Redirect to a vlan device with lower layer down + * + * Case 1, ping packets should be received by packet socket on target device + * when redirected to ingress, and by tun/tap fd when redirected to egress. + * + * Case 2,3 are considered successful as long as they do not crash the kernel + * as a regression. + * + * Case 1,2 use tap device to test redirect to device that requires MAC + * header, and tun device to test the case with no MAC header added. + */ +#include <sys/socket.h> +#include <net/if.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/icmp.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> + +#include "lwt_helpers.h" +#include "test_progs.h" +#include "network_helpers.h" + +#define BPF_OBJECT "test_lwt_redirect.bpf.o" +#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac") +#define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac") +#define LOCAL_SRC "10.0.0.1" +#define CIDR_TO_INGRESS "10.0.0.0/24" +#define CIDR_TO_EGRESS "20.0.0.0/24" + +/* ping to redirect toward given dev, with last byte of dest IP being the target + * device index. + * + * Note: ping command inside BPF-CI is busybox version, so it does not have certain + * function, such like -m option to set packet mark. + */ +static void ping_dev(const char *dev, bool is_ingress) +{ + int link_index = if_nametoindex(dev); + char ip[256]; + + if (!ASSERT_GE(link_index, 0, "if_nametoindex")) + return; + + if (is_ingress) + snprintf(ip, sizeof(ip), "10.0.0.%d", link_index); + else + snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); + + /* We won't get a reply. Don't fail here */ + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + ip, ICMP_PAYLOAD_SIZE); +} + +static int new_packet_sock(const char *ifname) +{ + int err = 0; + int ignore_outgoing = 1; + int ifindex = -1; + int s = -1; + + s = socket(AF_PACKET, SOCK_RAW, 0); + if (!ASSERT_GE(s, 0, "socket(AF_PACKET)")) + return -1; + + ifindex = if_nametoindex(ifname); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) { + close(s); + return -1; + } + + struct sockaddr_ll addr = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_IP), + .sll_ifindex = ifindex, + }; + + err = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + if (!ASSERT_OK(err, "bind(AF_PACKET)")) { + close(s); + return -1; + } + + /* Use packet socket to capture only the ingress, so we can distinguish + * the case where a regression that actually redirects the packet to + * the egress. + */ + err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING, + &ignore_outgoing, sizeof(ignore_outgoing)); + if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) { + close(s); + return -1; + } + + err = fcntl(s, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(s); + return -1; + } + + return s; +} + +static int expect_icmp(char *buf, ssize_t len) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + if (len < (ssize_t)sizeof(*eth)) + return -1; + + if (eth->h_proto == htons(ETH_P_IP)) + return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth)); + + return -1; +} + +static int expect_icmp_nomac(char *buf, ssize_t len) +{ + return __expect_icmp_ipv4(buf, len); +} + +static void send_and_capture_test_packets(const char *test_name, int tap_fd, + const char *target_dev, bool need_mac) +{ + int psock = -1; + struct timeval timeo = { + .tv_sec = 0, + .tv_usec = 250000, + }; + int ret = -1; + + filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac; + + ping_dev(target_dev, false); + + ret = wait_for_packet(tap_fd, filter, &timeo); + if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) { + log_err("%s egress test fails", test_name); + goto out; + } + + psock = new_packet_sock(target_dev); + ping_dev(target_dev, true); + + ret = wait_for_packet(psock, filter, &timeo); + if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) { + log_err("%s ingress test fails", test_name); + goto out; + } + +out: + if (psock >= 0) + close(psock); +} + +static int setup_redirect_target(const char *target_dev, bool need_mac) +{ + int target_index = -1; + int tap_fd = -1; + + tap_fd = open_tuntap(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto fail; + + target_index = if_nametoindex(target_dev); + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) + goto fail; + + SYS(fail, "ip link add link_err type dummy"); + SYS(fail, "ip link set lo up"); + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); + SYS(fail, "ip link set link_err up"); + SYS(fail, "ip link set %s up", target_dev); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", + CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac)); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", + CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac)); + + return tap_fd; + +fail: + if (tap_fd >= 0) + close(tap_fd); + return -1; +} + +static void test_lwt_redirect_normal(void) +{ + const char *target_dev = "tap0"; + int tap_fd = -1; + bool need_mac = true; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); + close(tap_fd); +} + +static void test_lwt_redirect_normal_nomac(void) +{ + const char *target_dev = "tun0"; + int tap_fd = -1; + bool need_mac = false; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); + close(tap_fd); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +static void __test_lwt_redirect_dev_down(bool need_mac) +{ + const char *target_dev = "tap0"; + int tap_fd = -1; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + SYS(out, "ip link set %s down", target_dev); + ping_dev(target_dev, true); + ping_dev(target_dev, false); + +out: + close(tap_fd); +} + +static void test_lwt_redirect_dev_down(void) +{ + __test_lwt_redirect_dev_down(true); +} + +static void test_lwt_redirect_dev_down_nomac(void) +{ + __test_lwt_redirect_dev_down(false); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +static void test_lwt_redirect_dev_carrier_down(void) +{ + const char *lower_dev = "tap0"; + const char *vlan_dev = "vlan100"; + int tap_fd = -1; + + tap_fd = setup_redirect_target(lower_dev, true); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev); + SYS(out, "ip link set %s up", vlan_dev); + SYS(out, "ip link set %s down", lower_dev); + ping_dev(vlan_dev, true); + ping_dev(vlan_dev, false); + +out: + close(tap_fd); +} + +static void *test_lwt_redirect_run(void *arg) +{ + netns_delete(); + RUN_TEST(lwt_redirect_normal); + RUN_TEST(lwt_redirect_normal_nomac); + RUN_TEST(lwt_redirect_dev_down); + RUN_TEST(lwt_redirect_dev_down_nomac); + RUN_TEST(lwt_redirect_dev_carrier_down); + return NULL; +} + +void test_lwt_redirect(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c new file mode 100644 index 000000000000..f4bb2d5fcae0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Test suite of lwt BPF programs that reroutes packets + * The file tests focus not only if these programs work as expected normally, + * but also if they can handle abnormal situations gracefully. This test + * suite currently only covers lwt_xmit hook. lwt_in tests have not been + * implemented. + * + * WARNING + * ------- + * This test suite can crash the kernel, thus should be run in a VM. + * + * Setup: + * --------- + * all tests are performed in a single netns. A lwt encap route is setup for + * each subtest: + * + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err + * + * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains + * a single test program entry. This program sets packet mark by last byte of + * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb + * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped + * to avoid route loop. We didn't use generated BPF skeleton since the + * attachment for lwt programs are not supported by libbpf yet. + * + * The test program will bring up a tun device, and sets up the following + * routes: + * + * ip rule add pref 100 from all fwmark <tun_index> lookup 100 + * ip route add table 100 default dev tun0 + * + * For normal testing, a ping command is running in the test netns: + * + * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 + * + * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP + * socket will try to overflow the fq queue and trigger qdisc drop error. + * + * Scenarios: + * -------------------------------- + * 1. Reroute to a running tun device + * 2. Reroute to a device where qdisc drop + * + * For case 1, ping packets should be received by the tun device. + * + * For case 2, force UDP packets to overflow fq limit. As long as kernel + * is not crashed, it is considered successful. + */ +#include "lwt_helpers.h" +#include "network_helpers.h" +#include <linux/net_tstamp.h> + +#define BPF_OBJECT "test_lwt_reroute.bpf.o" +#define LOCAL_SRC "10.0.0.1" +#define TEST_CIDR "10.0.0.0/24" +#define XMIT_HOOK "xmit" +#define XMIT_SECTION "lwt_xmit" +#define NSEC_PER_SEC 1000000000ULL + +/* send a ping to be rerouted to the target device */ +static void ping_once(const char *ip) +{ + /* We won't get a reply. Don't fail here */ + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + ip, ICMP_PAYLOAD_SIZE); +} + +/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop + * error. This is done via TX tstamp to force buffering delayed packets. + */ +static int overflow_fq(int snd_target, const char *target_ip) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(1234), + }; + + char data_buf[8]; /* only #pkts matter, so use a random small buffer */ + char control_buf[CMSG_SPACE(sizeof(uint64_t))]; + struct iovec iov = { + .iov_base = data_buf, + .iov_len = sizeof(data_buf), + }; + int err = -1; + int s = -1; + struct sock_txtime txtime_on = { + .clockid = CLOCK_MONOTONIC, + .flags = 0, + }; + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(addr), + .msg_control = control_buf, + .msg_controllen = sizeof(control_buf), + .msg_iovlen = 1, + .msg_iov = &iov, + }; + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + + memset(data_buf, 0, sizeof(data_buf)); + + s = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + goto out; + + err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); + if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) + goto out; + + err = inet_pton(AF_INET, target_ip, &addr.sin_addr); + if (!ASSERT_EQ(err, 1, "inet_pton")) + goto out; + + while (snd_target > 0) { + struct timespec now; + + memset(control_buf, 0, sizeof(control_buf)); + cmsg->cmsg_type = SCM_TXTIME; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); + + err = clock_gettime(CLOCK_MONOTONIC, &now); + if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { + err = -1; + goto out; + } + + *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + + now.tv_nsec; + + /* we will intentionally send more than fq limit, so ignore + * the error here. + */ + sendmsg(s, &msg, MSG_NOSIGNAL); + snd_target--; + } + + /* no kernel crash so far is considered success */ + err = 0; + +out: + if (s >= 0) + close(s); + + return err; +} + +static int setup(const char *tun_dev) +{ + int target_index = -1; + int tap_fd = -1; + + tap_fd = open_tuntap(tun_dev, false); + if (!ASSERT_GE(tap_fd, 0, "open_tun")) + return -1; + + target_index = if_nametoindex(tun_dev); + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) + return -1; + + SYS(fail, "ip link add link_err type dummy"); + SYS(fail, "ip link set lo up"); + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); + SYS(fail, "ip link set link_err up"); + SYS(fail, "ip link set %s up", tun_dev); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", + TEST_CIDR, BPF_OBJECT); + + SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", + target_index); + SYS(fail, "ip route add t 100 default dev %s", tun_dev); + + return tap_fd; + +fail: + if (tap_fd >= 0) + close(tap_fd); + return -1; +} + +static void test_lwt_reroute_normal_xmit(void) +{ + const char *tun_dev = "tun0"; + int tun_fd = -1; + int ifindex = -1; + char ip[256]; + struct timeval timeo = { + .tv_sec = 0, + .tv_usec = 250000, + }; + + tun_fd = setup(tun_dev); + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) + return; + + ifindex = if_nametoindex(tun_dev); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) + return; + + snprintf(ip, 256, "10.0.0.%d", ifindex); + + /* ping packets should be received by the tun device */ + ping_once(ip); + + if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, + "wait_for_packet")) + log_err("%s xmit", __func__); +} + +/* + * Test the failure case when the skb is dropped at the qdisc. This is a + * regression prevention at the xmit hook only. + */ +static void test_lwt_reroute_qdisc_dropped(void) +{ + const char *tun_dev = "tun0"; + int tun_fd = -1; + int ifindex = -1; + char ip[256]; + + tun_fd = setup(tun_dev); + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) + goto fail; + + SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); + + ifindex = if_nametoindex(tun_dev); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) + return; + + snprintf(ip, 256, "10.0.0.%d", ifindex); + ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); + +fail: + if (tun_fd >= 0) + close(tun_fd); +} + +static void *test_lwt_reroute_run(void *arg) +{ + netns_delete(); + RUN_TEST(lwt_reroute_normal_xmit); + RUN_TEST(lwt_reroute_qdisc_dropped); + return NULL; +} + +void test_lwt_reroute(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index 7423983472c7..d6bd5e16e637 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -9,12 +9,38 @@ void test_refcounted_kptr(void) { + RUN_TESTS(refcounted_kptr); } void test_refcounted_kptr_fail(void) { + RUN_TESTS(refcounted_kptr_fail); } void test_refcounted_kptr_wrong_owner(void) { + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct refcounted_kptr *skel; + int ret; + + skel = refcounted_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); + refcounted_kptr__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index 740d5f644b40..d4579f735398 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -79,6 +79,8 @@ static const char * const success_tests[] = { "test_task_from_pid_current", "test_task_from_pid_invalid", "task_kfunc_acquire_trusted_walked", + "test_task_kfunc_flavor_relo", + "test_task_kfunc_flavor_relo_not_found", }; void test_task_kfunc(void) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c index e873766276d1..48b55539331e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include <linux/pkt_cls.h> +#include "cap_helpers.h" #include "test_tc_bpf.skel.h" #define LO_IFINDEX 1 @@ -327,7 +328,7 @@ static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd) return 0; } -void test_tc_bpf(void) +void tc_bpf_root(void) { DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, .attach_point = BPF_TC_INGRESS); @@ -393,3 +394,36 @@ end: } test_tc_bpf__destroy(skel); } + +void tc_bpf_non_root(void) +{ + struct test_tc_bpf *skel = NULL; + __u64 caps = 0; + int ret; + + /* In case CAP_BPF and CAP_PERFMON is not set */ + ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); + if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) + return; + ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); + if (!ASSERT_OK(ret, "disable_cap_sys_admin")) + goto restore_cap; + + skel = test_tc_bpf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) + goto restore_cap; + + test_tc_bpf__destroy(skel); + +restore_cap: + if (caps) + cap_enable_effective(caps, NULL); +} + +void test_tc_bpf(void) +{ + if (test__start_subtest("tc_bpf_root")) + tc_bpf_root(); + if (test__start_subtest("tc_bpf_non_root")) + tc_bpf_non_root(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c new file mode 100644 index 000000000000..cd051d3901a9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <unistd.h> +#include <test_progs.h> +#include "uprobe_multi.skel.h" +#include "uprobe_multi_bench.skel.h" +#include "uprobe_multi_usdt.skel.h" +#include "bpf/libbpf_internal.h" +#include "testing_helpers.h" + +static char test_data[] = "test_data"; + +noinline void uprobe_multi_func_1(void) +{ + asm volatile (""); +} + +noinline void uprobe_multi_func_2(void) +{ + asm volatile (""); +} + +noinline void uprobe_multi_func_3(void) +{ + asm volatile (""); +} + +struct child { + int go[2]; + int pid; +}; + +static void release_child(struct child *child) +{ + int child_status; + + if (!child) + return; + close(child->go[1]); + close(child->go[0]); + if (child->pid > 0) + waitpid(child->pid, &child_status, 0); +} + +static void kick_child(struct child *child) +{ + char c = 1; + + if (child) { + write(child->go[1], &c, 1); + release_child(child); + } + fflush(NULL); +} + +static struct child *spawn_child(void) +{ + static struct child child; + int err; + int c; + + /* pipe to notify child to execute the trigger functions */ + if (pipe(child.go)) + return NULL; + + child.pid = fork(); + if (child.pid < 0) { + release_child(&child); + errno = EINVAL; + return NULL; + } + + /* child */ + if (child.pid == 0) { + close(child.go[1]); + + /* wait for parent's kick */ + err = read(child.go[0], &c, 1); + if (err != 1) + exit(err); + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + exit(errno); + } + + return &child; +} + +static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child) +{ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + skel->bss->user_ptr = test_data; + + /* + * Disable pid check in bpf program if we are pid filter test, + * because the probe should be executed only by child->pid + * passed at the probe attach. + */ + skel->bss->pid = child ? 0 : getpid(); + + if (child) + kick_child(child); + + /* trigger all probes */ + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + /* + * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123] + * function and each slepable probe (6) increments uprobe_multi_sleep_result. + */ + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result"); + + if (child) + ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid"); +} + +static void test_skel_api(void) +{ + struct uprobe_multi *skel = NULL; + int err; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + err = uprobe_multi__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi__attach")) + goto cleanup; + + uprobe_multi_test_run(skel, NULL); + +cleanup: + uprobe_multi__destroy(skel); +} + +static void +__test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts, + struct child *child) +{ + pid_t pid = child ? child->pid : -1; + struct uprobe_multi *skel = NULL; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = true; + skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = true; + skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep, + pid, binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + uprobe_multi_test_run(skel, child); + +cleanup: + uprobe_multi__destroy(skel); +} + +static void +test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts) +{ + struct child *child; + + /* no pid filter */ + __test_attach_api(binary, pattern, opts, NULL); + + /* pid filter */ + child = spawn_child(); + if (!ASSERT_OK_PTR(child, "spawn_child")) + return; + + __test_attach_api(binary, pattern, opts, child); +} + +static void test_attach_api_pattern(void) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + + test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts); + test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts); +} + +static void test_attach_api_syms(void) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + test_attach_api("/proc/self/exe", NULL, &opts); +} + +static void __test_link_api(struct child *child) +{ + int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *path = "/proc/self/exe"; + struct uprobe_multi *skel = NULL; + unsigned long *offsets = NULL; + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + int link_extra_fd = -1; + int err; + + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) + return; + + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = offsets; + opts.uprobe_multi.cnt = ARRAY_SIZE(syms); + opts.uprobe_multi.pid = child ? child->pid : 0; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe); + link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link1_fd, 0, "link1_fd")) + goto cleanup; + + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; + prog_fd = bpf_program__fd(skel->progs.uretprobe); + link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link2_fd, 0, "link2_fd")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_sleep); + link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link3_fd, 0, "link3_fd")) + goto cleanup; + + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; + prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep); + link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link4_fd, 0, "link4_fd")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + opts.uprobe_multi.pid = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); + link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd")) + goto cleanup; + + uprobe_multi_test_run(skel, child); + +cleanup: + if (link1_fd >= 0) + close(link1_fd); + if (link2_fd >= 0) + close(link2_fd); + if (link3_fd >= 0) + close(link3_fd); + if (link4_fd >= 0) + close(link4_fd); + if (link_extra_fd >= 0) + close(link_extra_fd); + + uprobe_multi__destroy(skel); + free(offsets); +} + +void test_link_api(void) +{ + struct child *child; + + /* no pid filter */ + __test_link_api(NULL); + + /* pid filter */ + child = spawn_child(); + if (!ASSERT_OK_PTR(child, "spawn_child")) + return; + + __test_link_api(child); +} + +static void test_bench_attach_uprobe(void) +{ + long attach_start_ns = 0, attach_end_ns = 0; + struct uprobe_multi_bench *skel = NULL; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + int err; + + skel = uprobe_multi_bench__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load")) + goto cleanup; + + attach_start_ns = get_time_ns(); + + err = uprobe_multi_bench__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_bench__attach")) + goto cleanup; + + attach_end_ns = get_time_ns(); + + system("./uprobe_multi bench"); + + ASSERT_EQ(skel->bss->count, 50000, "uprobes_count"); + +cleanup: + detach_start_ns = get_time_ns(); + uprobe_multi_bench__destroy(skel); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); +} + +static void test_bench_attach_usdt(void) +{ + long attach_start_ns = 0, attach_end_ns = 0; + struct uprobe_multi_usdt *skel = NULL; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + + skel = uprobe_multi_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open")) + goto cleanup; + + attach_start_ns = get_time_ns(); + + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi", + "test", "usdt", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt")) + goto cleanup; + + attach_end_ns = get_time_ns(); + + system("./uprobe_multi usdt"); + + ASSERT_EQ(skel->bss->count, 50000, "usdt_count"); + +cleanup: + detach_start_ns = get_time_ns(); + uprobe_multi_usdt__destroy(skel); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); +} + +void test_uprobe_multi_test(void) +{ + if (test__start_subtest("skel_api")) + test_skel_api(); + if (test__start_subtest("attach_api_pattern")) + test_attach_api_pattern(); + if (test__start_subtest("attach_api_syms")) + test_attach_api_syms(); + if (test__start_subtest("link_api")) + test_link_api(); + if (test__start_subtest("bench_uprobe")) + test_bench_attach_uprobe(); + if (test__start_subtest("bench_usdt")) + test_bench_attach_usdt(); +} diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 06838083079c..b567a666d2b8 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,10 +14,16 @@ struct node_data { struct bpf_rb_node node; }; +struct plain_local { + long key; + long data; +}; + struct map_value { struct prog_test_ref_kfunc *not_kptr; struct prog_test_ref_kfunc __kptr *val; struct node_data __kptr *node; + struct plain_local __kptr *plain; }; /* This is necessary so that LLVM generates BTF for node_data struct @@ -67,6 +73,28 @@ long stash_rb_nodes(void *ctx) } SEC("tc") +long stash_plain(void *ctx) +{ + struct map_value *mapval; + struct plain_local *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key = 41; + + res = bpf_kptr_xchg(&mapval->plain, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") long unstash_rb_node(void *ctx) { struct map_value *mapval; diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c new file mode 100644 index 000000000000..fcf7a7567da2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "../bpf_experimental.h" +#include "bpf_misc.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +struct map_value { + struct node_data __kptr *node; +}; + +struct node_data2 { + long key[4]; +}; + +/* This is necessary so that LLVM generates BTF for node_data struct + * If it's not included, a fwd reference for node_data will be generated but + * no struct. Example BTF of "node" field in map_value when not included: + * + * [10] PTR '(anon)' type_id=35 + * [34] FWD 'node_data' fwd_kind=struct + * [35] TYPE_TAG 'kptr_ref' type_id=34 + */ +struct node_data *just_here_because_btf_bug; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 2); +} some_nodes SEC(".maps"); + +SEC("tc") +__failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data") +long stash_rb_nodes(void *ctx) +{ + struct map_value *mapval; + struct node_data2 *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key[0] = 40; + + res = bpf_kptr_xchg(&mapval->node, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") +__failure __msg("R1 must have zero offset when passed to release func") +long drop_rb_node_off(void *ctx) +{ + struct map_value *mapval; + struct node_data *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + /* Try releasing with graph node offset */ + bpf_obj_drop(&res->node); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index c55652fdc63a..893a4fdb4b6e 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -8,6 +8,9 @@ #include "bpf_misc.h" #include "bpf_experimental.h" +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + struct node_data { long key; long list_data; @@ -497,4 +500,72 @@ long rbtree_wrong_owner_remove_fail_a2(void *ctx) return 0; } +SEC("?fentry.s/bpf_testmod_test_read") +__success +int BPF_PROG(rbtree_sleepable_rcu, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct bpf_rb_node *rb; + struct node_data *n, *m = NULL; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + bpf_rcu_read_lock(); + bpf_spin_lock(&lock); + bpf_rbtree_add(&root, &n->r, less); + rb = bpf_rbtree_first(&root); + if (!rb) + goto err_out; + + rb = bpf_rbtree_remove(&root, rb); + if (!rb) + goto err_out; + + m = container_of(rb, struct node_data, r); + +err_out: + bpf_spin_unlock(&lock); + bpf_rcu_read_unlock(); + if (m) + bpf_obj_drop(m); + return 0; +} + +SEC("?fentry.s/bpf_testmod_test_read") +__success +int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct bpf_rb_node *rb; + struct node_data *n, *m = NULL; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + /* No explicit bpf_rcu_read_lock */ + bpf_spin_lock(&lock); + bpf_rbtree_add(&root, &n->r, less); + rb = bpf_rbtree_first(&root); + if (!rb) + goto err_out; + + rb = bpf_rbtree_remove(&root, rb); + if (!rb) + goto err_out; + + m = container_of(rb, struct node_data, r); + +err_out: + bpf_spin_unlock(&lock); + /* No explicit bpf_rcu_read_unlock */ + if (m) + bpf_obj_drop(m); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 0b09e5c915b1..1ef07f6ee580 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -13,6 +13,9 @@ struct node_acquire { struct bpf_refcount refcount; }; +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_acquire, node); @@ -71,4 +74,29 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) return 0; } +SEC("?fentry.s/bpf_testmod_test_read") +__failure __msg("function calls are not allowed while holding a lock") +int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct node_acquire *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + /* spin_{lock,unlock} are in different RCU CS */ + bpf_rcu_read_lock(); + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + bpf_spin_unlock(&glock); + bpf_rcu_read_unlock(); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index b09371bba204..70df695312dc 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -18,6 +18,13 @@ int err, pid; */ struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; + +struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak; +/* The two-param bpf_task_acquire doesn't exist */ +struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak; +/* Incorrect type for first param */ +struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak; + void invalid_kfunc(void) __ksym __weak; void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; @@ -56,6 +63,50 @@ static int test_acquire_release(struct task_struct *task) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired = NULL; + int fake_ctx = 42; + + if (bpf_ksym_exists(bpf_task_acquire___one)) { + acquired = bpf_task_acquire___one(task); + } else if (bpf_ksym_exists(bpf_task_acquire___two)) { + /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id + * call will find vmlinux's bpf_task_acquire, but subsequent + * bpf_core_types_are_compat will fail + */ + acquired = bpf_task_acquire___two(task, &fake_ctx); + err = 3; + return 0; + } else if (bpf_ksym_exists(bpf_task_acquire___three)) { + /* bpf_core_types_are_compat will fail similarly to above case */ + acquired = bpf_task_acquire___three(&fake_ctx); + err = 4; + return 0; + } + + if (acquired) + bpf_task_release(acquired); + else + err = 5; + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags) +{ + /* Neither symbol should successfully resolve. + * Success or failure of one ___flavor should not affect others + */ + if (bpf_ksym_exists(bpf_task_acquire___two)) + err = 1; + else if (bpf_ksym_exists(bpf_task_acquire___three)) + err = 2; + + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags) { if (!is_test_kfunc_task()) diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 321abf862801..67c14ba1e87b 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -5,7 +5,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c new file mode 100644 index 000000000000..8c895122f293 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/ip.h> +#include "bpf_tracing_net.h" + +/* We don't care about whether the packet can be received by network stack. + * Just care if the packet is sent to the correct device at correct direction + * and not panic the kernel. + */ +static int prepend_dummy_mac(struct __sk_buff *skb) +{ + char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf, + 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00}; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0)) + return -1; + + if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0)) + return -1; + + return 0; +} + +/* Use the last byte of IP address to redirect the packet */ +static int get_redirect_target(struct __sk_buff *skb) +{ + struct iphdr *iph = NULL; + void *start = (void *)(long)skb->data; + void *end = (void *)(long)skb->data_end; + + if (start + sizeof(*iph) > end) + return -1; + + iph = (struct iphdr *)start; + return bpf_ntohl(iph->daddr) & 0xff; +} + +SEC("redir_ingress") +int test_lwt_redirect_in(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + if (prepend_dummy_mac(skb)) + return BPF_DROP; + + return bpf_redirect(target, BPF_F_INGRESS); +} + +SEC("redir_egress") +int test_lwt_redirect_out(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + if (prepend_dummy_mac(skb)) + return BPF_DROP; + + return bpf_redirect(target, 0); +} + +SEC("redir_egress_nomac") +int test_lwt_redirect_out_nomac(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + return bpf_redirect(target, 0); +} + +SEC("redir_ingress_nomac") +int test_lwt_redirect_in_nomac(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + return bpf_redirect(target, BPF_F_INGRESS); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_reroute.c b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c new file mode 100644 index 000000000000..1dc64351929c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/ip.h> + +/* This function extracts the last byte of the daddr, and uses it + * as output dev index. + */ +SEC("lwt_xmit") +int test_lwt_reroute(struct __sk_buff *skb) +{ + struct iphdr *iph = NULL; + void *start = (void *)(long)skb->data; + void *end = (void *)(long)skb->data_end; + + /* set mark at most once */ + if (skb->mark != 0) + return BPF_OK; + + if (start + sizeof(*iph) > end) + return BPF_DROP; + + iph = (struct iphdr *)start; + skb->mark = bpf_ntohl(iph->daddr) & 0xff; + + /* do not reroute x.x.x.0 packets */ + if (skb->mark == 0) + return BPF_OK; + + return BPF_LWT_REROUTE; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c index d28ca8d1f3d0..ef7da419632a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -2,6 +2,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/ip.h> /* Dummy prog to test TC-BPF API */ @@ -10,3 +12,14 @@ int cls(struct __sk_buff *skb) { return 0; } + +/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */ +SEC("tcx/ingress") +int pkt_ptr(struct __sk_buff *skb) +{ + struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr); + + if ((long)(iph + 1) > (long)skb->data_end) + return 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c new file mode 100644 index 000000000000..419d9aa28fce --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_multi_func_1_addr = 0; +__u64 uprobe_multi_func_2_addr = 0; +__u64 uprobe_multi_func_3_addr = 0; + +__u64 uprobe_multi_func_1_result = 0; +__u64 uprobe_multi_func_2_result = 0; +__u64 uprobe_multi_func_3_result = 0; + +__u64 uretprobe_multi_func_1_result = 0; +__u64 uretprobe_multi_func_2_result = 0; +__u64 uretprobe_multi_func_3_result = 0; + +__u64 uprobe_multi_sleep_result = 0; + +int pid = 0; +int child_pid = 0; + +bool test_cookie = false; +void *user_ptr = 0; + +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep) +{ + child_pid = bpf_get_current_pid_tgid() >> 32; + + if (pid && child_pid != pid) + return; + + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; + __u64 addr = bpf_get_func_ip(ctx); + +#define SET(__var, __addr, __cookie) ({ \ + if (addr == __addr && \ + (!test_cookie || (cookie == __cookie))) \ + __var += 1; \ +}) + + if (is_return) { + SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2); + SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3); + SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1); + } else { + SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3); + SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1); + SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2); + } + +#undef SET + + if (is_sleep && verify_sleepable_user_copy()) + uprobe_multi_sleep_result += 1; +} + +SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uprobe(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, false, false); + return 0; +} + +SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uretprobe(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, true, false); + return 0; +} + +SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*") +int uprobe_sleep(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, false, true); + return 0; +} + +SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*") +int uretprobe_sleep(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, true, true); + return 0; +} + +SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uprobe_extra(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c new file mode 100644 index 000000000000..5367f6105e30 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int count; + +SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*") +int uprobe_bench(struct pt_regs *ctx) +{ + count++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c new file mode 100644 index 000000000000..9e1c33d0bd2f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +char _license[] SEC("license") = "GPL"; + +int count; + +SEC("usdt") +int usdt0(struct pt_regs *ctx) +{ + count++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 724bb38988b5..8893094725f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -4,7 +4,8 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 SEC("socket") __description("BSWAP, 16") diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index ce48f7757db2..2dae5322a18e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -4,7 +4,8 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 SEC("socket") __description("gotol, small_imm") diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 3c3d1bddd67f..0c638f45aaf1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -4,7 +4,8 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 SEC("socket") __description("LDSX, S8") diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index be6f69a6b659..3c8ac2c57b1b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -4,7 +4,8 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 SEC("socket") __description("MOV32SX, S8") diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index f61a9a1058c8..0990f8825675 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -4,7 +4,8 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 SEC("socket") __description("SDIV32, non-zero imm divisor, check 1") diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 5312323881b6..5b7a55136741 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -7,6 +7,7 @@ #include <stdbool.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <time.h> int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); @@ -33,4 +34,13 @@ int load_bpf_testmod(bool verbose); int unload_bpf_testmod(bool verbose); int kern_sync_rcu(void); +static inline __u64 get_time_ns(void) +{ + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + + return (u64)t.tv_sec * 1000000000 + t.tv_nsec; +} + #endif /* __TESTING_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c new file mode 100644 index 000000000000..a61ceab60b68 --- /dev/null +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sdt.h> + +#define __PASTE(a, b) a##b +#define PASTE(a, b) __PASTE(a, b) + +#define NAME(name, idx) PASTE(name, idx) + +#define DEF(name, idx) int NAME(name, idx)(void) { return 0; } +#define CALL(name, idx) NAME(name, idx)(); + +#define F(body, name, idx) body(name, idx) + +#define F10(body, name, idx) \ + F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \ + F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \ + F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \ + F(body, PASTE(name, idx), 9) + +#define F100(body, name, idx) \ + F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \ + F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \ + F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \ + F10(body, PASTE(name, idx), 9) + +#define F1000(body, name, idx) \ + F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \ + F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \ + F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \ + F100(body, PASTE(name, idx), 9) + +#define F10000(body, name, idx) \ + F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \ + F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \ + F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \ + F1000(body, PASTE(name, idx), 9) + +F10000(DEF, uprobe_multi_func_, 0) +F10000(DEF, uprobe_multi_func_, 1) +F10000(DEF, uprobe_multi_func_, 2) +F10000(DEF, uprobe_multi_func_, 3) +F10000(DEF, uprobe_multi_func_, 4) + +static int bench(void) +{ + F10000(CALL, uprobe_multi_func_, 0) + F10000(CALL, uprobe_multi_func_, 1) + F10000(CALL, uprobe_multi_func_, 2) + F10000(CALL, uprobe_multi_func_, 3) + F10000(CALL, uprobe_multi_func_, 4) + return 0; +} + +#define PROBE STAP_PROBE(test, usdt); + +#define PROBE10 PROBE PROBE PROBE PROBE PROBE \ + PROBE PROBE PROBE PROBE PROBE +#define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \ + PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 +#define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \ + PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 +#define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \ + PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 + +static int usdt(void) +{ + PROBE10000 + PROBE10000 + PROBE10000 + PROBE10000 + PROBE10000 + return 0; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + goto error; + + if (!strcmp("bench", argv[1])) + return bench(); + if (!strcmp("usdt", argv[1])) + return usdt(); + +error: + fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]); + return -1; +} |