diff options
Diffstat (limited to 'tools')
224 files changed, 9837 insertions, 748 deletions
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 5535650800ab..f897eeeb0b4f 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -73,7 +73,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm -all: $(PROGS) bpftool +all: $(PROGS) bpftool runqslower $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm' $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean +clean: bpftool_clean runqslower_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* @@ -97,7 +97,7 @@ clean: bpftool_clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -r -- $(OUTPUT)feature -install: $(PROGS) bpftool_install +install: $(PROGS) bpftool_install runqslower_install $(call QUIET_INSTALL, bpf_jit_disasm) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm @@ -115,4 +115,14 @@ bpftool_install: bpftool_clean: $(call descend,bpftool,clean) -.PHONY: all install clean bpftool bpftool_install bpftool_clean +runqslower: + $(call descend,runqslower) + +runqslower_install: + $(call descend,runqslower,install) + +runqslower_clean: + $(call descend,runqslower,clean) + +.PHONY: all install clean bpftool bpftool_install bpftool_clean \ + runqslower runqslower_install runqslower_clean diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 86a87da97d0b..94d91322895a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -196,7 +196,7 @@ and global variables. #define __EXAMPLE_SKEL_H__ #include <stdlib.h> - #include <libbpf.h> + #include <bpf/libbpf.h> struct example { struct bpf_object_skeleton *skeleton; diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 39bc6f0f4f0b..c4e810335810 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -45,7 +45,7 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ - -I$(srctree)/tools/lib/bpf \ + -I$(srctree)/tools/lib \ -I$(srctree)/tools/perf CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' ifneq ($(EXTRA_CFLAGS),) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index e5bc97b71ceb..4ba90d81b6a1 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -8,15 +8,15 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include <linux/btf.h> #include <linux/hashtable.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include "btf.h" #include "json_writer.h" #include "main.h" @@ -370,6 +370,10 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); + printf("#endif\n\n"); + if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { err = btf_dump__dump_type(d, root_type_ids[i]); @@ -386,6 +390,10 @@ static int dump_btf_c(const struct btf *btf, } } + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute pop\n"); + printf("#endif\n"); + done: btf_dump__free(d); return err; @@ -524,7 +532,7 @@ static int do_dump(int argc, char **argv) if (IS_ERR(btf)) { err = PTR_ERR(btf); btf = NULL; - p_err("failed to load BTF from %s: %s", + p_err("failed to load BTF from %s: %s", *argv, strerror(err)); goto done; } diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index d66131f69689..01cc52b834fa 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -8,8 +8,8 @@ #include <linux/bitops.h> #include <linux/btf.h> #include <linux/err.h> +#include <bpf/btf.h> -#include "btf.h" #include "json_writer.h" #include "main.h" @@ -26,7 +26,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw, bool is_plain_text) { if (is_plain_text) - jsonw_printf(jw, "%p", data); + jsonw_printf(jw, "%p", *(void **)data); else jsonw_printf(jw, "%lu", *(unsigned long *)data); } diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 2f017caa678d..62c6a1d7cd18 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -14,7 +14,7 @@ #include <sys/types.h> #include <unistd.h> -#include <bpf.h> +#include <bpf/bpf.h> #include "main.h" diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 88264abaa738..b75b8ec5469c 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -20,8 +20,8 @@ #include <sys/stat.h> #include <sys/vfs.h> -#include <bpf.h> -#include <libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/bpf.h> +#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ #include "main.h" diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 03bdc5b3ac49..446ba891f1e2 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -12,8 +12,8 @@ #include <linux/filter.h> #include <linux/limits.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include <zlib.h> #include "main.h" @@ -572,6 +572,18 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, printf("\n"); } +static void +probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +{ + bool res; + + res = bpf_probe_large_insn_limit(ifindex); + print_bool_feature("have_large_insn_limit", + "Large program size limit", + "HAVE_LARGE_INSN_LIMIT", + res, define_prefix); +} + static int do_probe(int argc, char **argv) { enum probe_component target = COMPONENT_UNSPEC; @@ -724,6 +736,12 @@ static int do_probe(int argc, char **argv) probe_helpers_for_progtype(i, supported_types[i], define_prefix, ifindex); + print_end_then_start_section("misc", + "Scanning miscellaneous eBPF features...", + "/*** eBPF misc features ***/", + define_prefix); + probe_large_insn_limit(define_prefix, ifindex); + exit_close_json: if (json_output) { /* End current "section" of probes */ diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 7ce09a9a6999..f8113b3646f5 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -12,15 +12,15 @@ #include <stdio.h> #include <string.h> #include <unistd.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/mman.h> #include <unistd.h> +#include <bpf/btf.h> -#include "btf.h" -#include "libbpf_internal.h" +#include "bpf/libbpf_internal.h" #include "json_writer.h" #include "main.h" @@ -333,7 +333,7 @@ static int do_skeleton(int argc, char **argv) #define %2$s \n\ \n\ #include <stdlib.h> \n\ - #include <libbpf.h> \n\ + #include <bpf/libbpf.h> \n\ \n\ struct %1$s { \n\ struct bpf_object_skeleton *skeleton; \n\ diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index bfed711258ce..f7f5885aa3ba 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -24,7 +24,7 @@ #include <dis-asm.h> #include <sys/stat.h> #include <limits.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include "json_writer.h" #include "main.h" diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 1fe91c558508..6d41bbfc6459 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -9,8 +9,8 @@ #include <stdlib.h> #include <string.h> -#include <bpf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "main.h" diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c01f76fa6876..e6c85680b34d 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -15,9 +15,9 @@ #include <sys/types.h> #include <sys/stat.h> -#include <bpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> -#include "btf.h" #include "json_writer.h" #include "main.h" @@ -48,6 +48,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_QUEUE] = "queue", [BPF_MAP_TYPE_STACK] = "stack", [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", + [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -251,6 +252,7 @@ static int do_dump_btf(const struct btf_dumper *d, struct bpf_map_info *map_info, void *key, void *value) { + __u32 value_id; int ret; /* start of key-value pair */ @@ -264,9 +266,12 @@ static int do_dump_btf(const struct btf_dumper *d, goto err_end_obj; } + value_id = map_info->btf_vmlinux_value_type_id ? + : map_info->btf_value_type_id; + if (!map_is_per_cpu(map_info->type)) { jsonw_name(d->jw, "value"); - ret = btf_dumper_type(d, map_info->btf_value_type_id, value); + ret = btf_dumper_type(d, value_id, value); } else { unsigned int i, n, step; @@ -278,8 +283,7 @@ static int do_dump_btf(const struct btf_dumper *d, jsonw_start_object(d->jw); jsonw_int_field(d->jw, "cpu", i); jsonw_name(d->jw, "value"); - ret = btf_dumper_type(d, map_info->btf_value_type_id, - value + i * step); + ret = btf_dumper_type(d, value_id, value + i * step); jsonw_end_object(d->jw); if (ret) break; @@ -915,37 +919,63 @@ static int maps_have_btf(int *fds, int nb_fds) { struct bpf_map_info info = {}; __u32 len = sizeof(info); - struct btf *btf = NULL; int err, i; for (i = 0; i < nb_fds; i++) { err = bpf_obj_get_info_by_fd(fds[i], &info, &len); if (err) { p_err("can't get map info: %s", strerror(errno)); - goto err_close; - } - - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto err_close; + return -1; } - if (!btf) + if (!info.btf_id) return 0; } return 1; +} -err_close: - for (; i < nb_fds; i++) - close(fds[i]); - return -1; +static struct btf *btf_vmlinux; + +static struct btf *get_map_kv_btf(const struct bpf_map_info *info) +{ + struct btf *btf = NULL; + + if (info->btf_vmlinux_value_type_id) { + if (!btf_vmlinux) { + btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(btf_vmlinux)) + p_err("failed to get kernel btf"); + } + return btf_vmlinux; + } else if (info->btf_value_type_id) { + int err; + + err = btf__get_from_id(info->btf_id, &btf); + if (err || !btf) { + p_err("failed to get btf"); + btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); + } + } + + return btf; +} + +static void free_map_kv_btf(struct btf *btf) +{ + if (!IS_ERR(btf) && btf != btf_vmlinux) + btf__free(btf); +} + +static void free_btf_vmlinux(void) +{ + if (!IS_ERR(btf_vmlinux)) + btf__free(btf_vmlinux); } static int map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, - bool enable_btf, bool show_header) + bool show_header) { void *key, *value, *prev_key; unsigned int num_elems = 0; @@ -962,18 +992,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, prev_key = NULL; - if (enable_btf) { - err = btf__get_from_id(info->btf_id, &btf); - if (err || !btf) { - /* enable_btf is true only if we've already checked - * that all maps have BTF information. - */ - p_err("failed to get btf"); + if (wtr) { + btf = get_map_kv_btf(info); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); goto exit_free; } - } - if (wtr) { if (show_header) { jsonw_start_object(wtr); /* map object */ show_map_header_json(info, wtr); @@ -1012,7 +1037,7 @@ exit_free: free(key); free(value); close(fd); - btf__free(btf); + free_map_kv_btf(btf); return err; } @@ -1021,7 +1046,7 @@ static int do_dump(int argc, char **argv) { json_writer_t *wtr = NULL, *btf_wtr = NULL; struct bpf_map_info info = {}; - int nb_fds, i = 0, btf = 0; + int nb_fds, i = 0; __u32 len = sizeof(info); int *fds = NULL; int err = -1; @@ -1041,17 +1066,17 @@ static int do_dump(int argc, char **argv) if (json_output) { wtr = json_wtr; } else { - btf = maps_have_btf(fds, nb_fds); - if (btf < 0) + int do_plain_btf; + + do_plain_btf = maps_have_btf(fds, nb_fds); + if (do_plain_btf < 0) goto exit_close; - if (btf) { + + if (do_plain_btf) { btf_wtr = get_btf_writer(); - if (btf_wtr) { - wtr = btf_wtr; - } else { + wtr = btf_wtr; + if (!btf_wtr) p_info("failed to create json writer for btf. falling back to plain output"); - btf = 0; - } } } @@ -1062,7 +1087,7 @@ static int do_dump(int argc, char **argv) p_err("can't get map info: %s", strerror(errno)); break; } - err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1); + err = map_dump(fds[i], &info, wtr, nb_fds > 1); if (!wtr && i != nb_fds - 1) printf("\n"); @@ -1073,13 +1098,14 @@ static int do_dump(int argc, char **argv) if (wtr && nb_fds > 1) jsonw_end_array(wtr); /* root array */ - if (btf) + if (btf_wtr) jsonw_destroy(&btf_wtr); exit_close: for (; i < nb_fds; i++) close(fds[i]); exit_free: free(fds); + free_btf_vmlinux(); return err; } diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 4c5531d1a450..d9b29c17fbb8 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -6,7 +6,7 @@ */ #include <errno.h> #include <fcntl.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include <poll.h> #include <signal.h> #include <stdbool.h> @@ -21,7 +21,7 @@ #include <sys/mman.h> #include <sys/syscall.h> -#include <bpf.h> +#include <bpf/bpf.h> #include <perf-sys.h> #include "main.h" diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index d93bee298e54..c5e3895b7c8b 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -7,7 +7,8 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include <net/if.h> #include <linux/if.h> #include <linux/rtnetlink.h> @@ -16,9 +17,8 @@ #include <sys/stat.h> #include <sys/types.h> -#include <bpf.h> -#include <nlattr.h> -#include "libbpf_internal.h" +#include "bpf/nlattr.h" +#include "bpf/libbpf_internal.h" #include "main.h" #include "netlink_dumper.h" diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index 550a0f537eed..5f65140b003b 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -3,11 +3,11 @@ #include <stdlib.h> #include <string.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include <linux/rtnetlink.h> #include <linux/tc_act/tc_bpf.h> -#include <nlattr.h> +#include "bpf/nlattr.h" #include "main.h" #include "netlink_dumper.h" diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index b2046f33e23f..3341aa14acda 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -13,7 +13,7 @@ #include <unistd.h> #include <ftw.h> -#include <bpf.h> +#include <bpf/bpf.h> #include "main.h" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 47a61ac42dc0..a3521deca869 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -17,9 +17,9 @@ #include <linux/err.h> #include <linux/sizes.h> -#include <bpf.h> -#include <btf.h> -#include <libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include <bpf/libbpf.h> #include "cfg.h" #include "main.h" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 5b91ee65a080..8608cd68cdd0 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -7,7 +7,7 @@ #include <stdlib.h> #include <string.h> #include <sys/types.h> -#include <libbpf.h> +#include <bpf/libbpf.h> #include "disasm.h" #include "json_writer.h" diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore new file mode 100644 index 000000000000..90a456a2a72f --- /dev/null +++ b/tools/bpf/runqslower/.gitignore @@ -0,0 +1 @@ +/.output diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile new file mode 100644 index 000000000000..faf5418609ea --- /dev/null +++ b/tools/bpf/runqslower/Makefile @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG := clang +LLC := llc +LLVM_STRIP := llvm-strip +DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +LIBBPF_SRC := $(abspath ../../lib/bpf) +BPFOBJ := $(OUTPUT)/libbpf.a +BPF_INCLUDE := $(OUTPUT) +INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib) +CFLAGS := -g -Wall + +# Try to detect best kernel BTF source +KERNEL_REL := $(shell uname -r) +VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL) +VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ + $(wildcard $(VMLINUX_BTF_PATHS)))) + +abs_out := $(abspath $(OUTPUT)) +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +.DELETE_ON_ERROR: + +.PHONY: all clean runqslower +all: runqslower + +runqslower: $(OUTPUT)/runqslower + +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) runqslower + +$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ) + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) -lelf -lz $^ -o $@ + +$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ + $(OUTPUT)/runqslower.bpf.o + +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h + +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf $(INCLUDES) \ + -c $(filter %.c,$^) -o $@ && \ + $(LLVM_STRIP) -g $@ + +$(OUTPUT)/%.o: %.c | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ + +$(OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $(OUTPUT) + +$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN,$@) + $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \ + echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \ + "specify its location." >&2; \ + exit 1;\ + fi + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@ + +$(BPFOBJ): | $(OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ + OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + +$(DEFAULT_BPFTOOL): + $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c new file mode 100644 index 000000000000..48a39f72fadf --- /dev/null +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "runqslower.h" + +#define TASK_RUNNING 0 + +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK + +const volatile __u64 min_us = 0; +const volatile pid_t targ_pid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +/* record enqueue timestamp */ +__always_inline +static int trace_enqueue(u32 tgid, u32 pid) +{ + u64 ts; + + if (!pid || (targ_pid && targ_pid != pid)) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, 0); + return 0; +} + +SEC("tp_btf/sched_wakeup") +int handle__sched_wakeup(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_wakeup_new") +int handle__sched_wakeup_new(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_switch") +int handle__sched_switch(u64 *ctx) +{ + /* TP_PROTO(bool preempt, struct task_struct *prev, + * struct task_struct *next) + */ + struct task_struct *prev = (struct task_struct *)ctx[1]; + struct task_struct *next = (struct task_struct *)ctx[2]; + struct event event = {}; + u64 *tsp, delta_us; + long state; + u32 pid; + + /* ivcsw: treat like an enqueue event and store timestamp */ + if (prev->state == TASK_RUNNING) + trace_enqueue(prev->tgid, prev->pid); + + pid = next->pid; + + /* fetch timestamp and calculate delta */ + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + return 0; /* missed enqueue */ + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + if (min_us && delta_us <= min_us) + return 0; + + event.pid = pid; + event.delta_us = delta_us; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + /* output */ + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + + bpf_map_delete_elem(&start, &pid); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c new file mode 100644 index 000000000000..d89715844952 --- /dev/null +++ b/tools/bpf/runqslower/runqslower.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <time.h> +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include "runqslower.h" +#include "runqslower.skel.h" + +struct env { + pid_t pid; + __u64 min_us; + bool verbose; +} env = { + .min_us = 10000, +}; + +const char *argp_program_version = "runqslower 0.1"; +const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; +const char argp_program_doc[] = +"runqslower Trace long process scheduling delays.\n" +" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n" +"\n" +"This script traces high scheduling delays between tasks being\n" +"ready to run and them running on CPU after that.\n" +"\n" +"USAGE: runqslower [-p PID] [min_us]\n" +"\n" +"EXAMPLES:\n" +" runqslower # trace run queue latency higher than 10000 us (default)\n" +" runqslower 1000 # trace run queue latency higher than 1000 us\n" +" runqslower -p 123 # trace pid 123 only\n"; + +static const struct argp_option opts[] = { + { "pid", 'p', "PID", 0, "Process PID to trace"}, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + int pid; + long long min_us; + + switch (key) { + case 'v': + env.verbose = true; + break; + case 'p': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid PID: %s\n", arg); + argp_usage(state); + } + env.pid = pid; + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", arg); + argp_usage(state); + } + env.min_us = min_us; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[32]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us); +} + +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer_opts pb_opts; + struct perf_buffer *pb = NULL; + struct runqslower_bpf *obj; + int err; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + return 1; + } + + obj = runqslower_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + /* initialize global data (filtering options) */ + obj->rodata->targ_pid = env.pid; + obj->rodata->min_us = env.min_us; + + err = runqslower_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = runqslower_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + printf("Tracing run queue latency higher than %llu us\n", env.min_us); + printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); + + pb_opts.sample_cb = handle_event; + pb_opts.lost_cb = handle_lost_events; + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; + fprintf(stderr, "failed to open perf buffer: %d\n", err); + goto cleanup; + } + + while ((err = perf_buffer__poll(pb, 100)) >= 0) + ; + printf("Error polling perf buffer: %d\n", err); + +cleanup: + perf_buffer__free(pb); + runqslower_bpf__destroy(obj); + + return err != 0; +} diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h new file mode 100644 index 000000000000..9db225425e5f --- /dev/null +++ b/tools/bpf/runqslower/runqslower.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __RUNQSLOWER_H +#define __RUNQSLOWER_H + +#define TASK_COMM_LEN 16 + +struct event { + char task[TASK_COMM_LEN]; + __u64 delta_us; + pid_t pid; +}; + +#endif /* __RUNQSLOWER_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7df436da542d..f1d74a2bd234 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -107,6 +107,10 @@ enum bpf_cmd { BPF_MAP_LOOKUP_AND_DELETE_ELEM, BPF_MAP_FREEZE, BPF_BTF_GET_NEXT_ID, + BPF_MAP_LOOKUP_BATCH, + BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, }; enum bpf_map_type { @@ -136,6 +140,7 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_DEVMAP_HASH, + BPF_MAP_TYPE_STRUCT_OPS, }; /* Note that tracing related programs such as @@ -174,6 +179,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_PROG_TYPE_TRACING, + BPF_PROG_TYPE_STRUCT_OPS, + BPF_PROG_TYPE_EXT, }; enum bpf_attach_type { @@ -357,7 +364,12 @@ enum bpf_attach_type { /* Enable memory-mapping BPF map */ #define BPF_F_MMAPABLE (1U << 10) -/* flags for BPF_PROG_QUERY */ +/* Flags for BPF_PROG_QUERY. */ + +/* Query effective (directly attached + inherited from ancestor cgroups) + * programs that will be executed for events within a cgroup. + * attach_flags with this flag are returned only for directly attached programs. + */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) enum bpf_stack_build_id_status { @@ -397,6 +409,10 @@ union bpf_attr { __u32 btf_fd; /* fd pointing to a BTF type data */ __u32 btf_key_type_id; /* BTF type_id of the key */ __u32 btf_value_type_id; /* BTF type_id of the value */ + __u32 btf_vmlinux_value_type_id;/* BTF type_id of a kernel- + * struct stored as the + * map value + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -409,6 +425,23 @@ union bpf_attr { __u64 flags; }; + struct { /* struct used by BPF_MAP_*_BATCH commands */ + __aligned_u64 in_batch; /* start batch, + * NULL to start from beginning + */ + __aligned_u64 out_batch; /* output: next start batch */ + __aligned_u64 keys; + __aligned_u64 values; + __u32 count; /* input/output: + * input: # of key/value + * elements + * output: # of filled elements + */ + __u32 map_fd; + __u64 elem_flags; + __u64 flags; + } batch; + struct { /* anonymous struct used by BPF_PROG_LOAD command */ __u32 prog_type; /* one of enum bpf_prog_type */ __u32 insn_cnt; @@ -2703,7 +2736,8 @@ union bpf_attr { * * int bpf_send_signal(u32 sig) * Description - * Send signal *sig* to the current task. + * Send signal *sig* to the process of the current task. + * The signal may be delivered to any of this process's threads. * Return * 0 on success or successfully queued. * @@ -2831,6 +2865,33 @@ union bpf_attr { * Return * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. + * + * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * Description + * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock. + * *rcv_nxt* is the ack_seq to be sent out. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_send_signal_thread(u32 sig) + * Description + * Send signal *sig* to the thread corresponding to the current task. + * Return + * 0 on success or successfully queued. + * + * **-EBUSY** if work queue under nmi is full. + * + * **-EINVAL** if *sig* is invalid. + * + * **-EPERM** if no permission to send the *sig*. + * + * **-EAGAIN** if bpf program can try again. + * + * u64 bpf_jiffies64(void) + * Description + * Obtain the 64bit jiffies + * Return + * The 64 bit jiffies */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2948,7 +3009,10 @@ union bpf_attr { FN(probe_read_user), \ FN(probe_read_kernel), \ FN(probe_read_user_str), \ - FN(probe_read_kernel_str), + FN(probe_read_kernel_str), \ + FN(tcp_send_ack), \ + FN(send_signal_thread), \ + FN(jiffies64), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3349,7 +3413,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; - __u32 :32; + __u32 btf_vmlinux_value_type_id; __u64 netns_dev; __u64 netns_ino; __u32 btf_id; diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 1a2898c482ee..5a667107ad2c 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -146,6 +146,12 @@ enum { BTF_VAR_GLOBAL_EXTERN = 2, }; +enum btf_func_linkage { + BTF_FUNC_STATIC = 0, + BTF_FUNC_GLOBAL = 1, + BTF_FUNC_EXTERN = 2, +}; + /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe * additional information related to the variable such as its linkage. */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8aec8769d944..024af2d1d0af 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -169,6 +169,7 @@ enum { IFLA_MAX_MTU, IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ + IFLA_PERM_ADDRESS, __IFLA_MAX }; @@ -485,6 +486,13 @@ enum macsec_validation_type { MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1, }; +enum macsec_offload { + MACSEC_OFFLOAD_OFF = 0, + MACSEC_OFFLOAD_PHY = 1, + __MACSEC_OFFLOAD_END, + MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1, +}; + /* IPVLAN section */ enum { IFLA_IPVLAN_UNSPEC, diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index d87830e7ea63..aee7f1a83c77 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -183,7 +183,7 @@ $(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) @@ -273,10 +273,11 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ - *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ - $(SHARED_OBJDIR) $(STATIC_OBJDIR) + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ + *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ + $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ + $(addprefix $(OUTPUT), \ + *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a787d53699c8..c6dafe563176 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,6 +32,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -95,7 +98,11 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) attr.btf_key_type_id = create_attr->btf_key_type_id; attr.btf_value_type_id = create_attr->btf_value_type_id; attr.map_ifindex = create_attr->map_ifindex; - attr.inner_map_fd = create_attr->inner_map_fd; + if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) + attr.btf_vmlinux_value_type_id = + create_attr->btf_vmlinux_value_type_id; + else + attr.inner_map_fd = create_attr->inner_map_fd; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } @@ -228,7 +235,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; - if (attr.prog_type == BPF_PROG_TYPE_TRACING) { + if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) { + attr.attach_btf_id = load_attr->attach_btf_id; + } else if (attr.prog_type == BPF_PROG_TYPE_TRACING || + attr.prog_type == BPF_PROG_TYPE_EXT) { attr.attach_btf_id = load_attr->attach_btf_id; attr.attach_prog_fd = load_attr->attach_prog_fd; } else { @@ -443,6 +453,64 @@ int bpf_map_freeze(int fd) return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); } +static int bpf_map_batch_common(int cmd, int fd, void *in_batch, + void *out_batch, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + union bpf_attr attr; + int ret; + + if (!OPTS_VALID(opts, bpf_map_batch_opts)) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.batch.map_fd = fd; + attr.batch.in_batch = ptr_to_u64(in_batch); + attr.batch.out_batch = ptr_to_u64(out_batch); + attr.batch.keys = ptr_to_u64(keys); + attr.batch.values = ptr_to_u64(values); + attr.batch.count = *count; + attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); + attr.batch.flags = OPTS_GET(opts, flags, 0); + + ret = sys_bpf(cmd, &attr, sizeof(attr)); + *count = attr.batch.count; + + return ret; +} + +int bpf_map_delete_batch(int fd, void *keys, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, + NULL, keys, NULL, count, opts); +} + +int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, + out_batch, keys, values, count, opts); +} + +int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, + fd, in_batch, out_batch, keys, values, + count, opts); +} + +int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts) +{ + return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, + keys, values, count, opts); +} + int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f0ab8519986e..b976e77316cc 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -46,7 +46,10 @@ struct bpf_create_map_attr { __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 map_ifindex; - __u32 inner_map_fd; + union { + __u32 inner_map_fd; + __u32 btf_vmlinux_value_type_id; + }; }; LIBBPF_API int @@ -124,6 +127,28 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); + +struct bpf_map_batch_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u64 elem_flags; + __u64 flags; +}; +#define bpf_map_batch_opts__last_field flags + +LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, + void *keys, void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, + void *out_batch, void *keys, + void *values, __u32 *count, + const struct bpf_map_batch_opts *opts); +LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + __u32 *count, + const struct bpf_map_batch_opts *opts); + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 3ed1a27b5f7c..bafca49cb1e6 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -8,6 +8,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + struct bpf_prog_linfo { void *raw_linfo; void *raw_jited_linfo; diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 5f04f56e1eb6..3d1c25fc97ae 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -8,6 +8,10 @@ #include <fcntl.h> #include <unistd.h> #include <errno.h> +#include <sys/utsname.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> #include <gelf.h> @@ -17,8 +21,11 @@ #include "libbpf_internal.h" #include "hashmap.h" -#define BTF_MAX_NR_TYPES 0x7fffffff -#define BTF_MAX_STR_OFFSET 0x7fffffff +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +#define BTF_MAX_NR_TYPES 0x7fffffffU +#define BTF_MAX_STR_OFFSET 0x7fffffffU static struct btf_type btf_void; @@ -50,7 +57,7 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) if (btf->types_size == BTF_MAX_NR_TYPES) return -E2BIG; - expand_by = max(btf->types_size >> 2, 16); + expand_by = max(btf->types_size >> 2, 16U); new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); new_types = realloc(btf->types, sizeof(*new_types) * new_size); @@ -286,7 +293,7 @@ int btf__align_of(const struct btf *btf, __u32 id) switch (kind) { case BTF_KIND_INT: case BTF_KIND_ENUM: - return min(sizeof(void *), t->size); + return min(sizeof(void *), (size_t)t->size); case BTF_KIND_PTR: return sizeof(void *); case BTF_KIND_TYPEDEF: @@ -1398,7 +1405,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d, if (d->hypot_cnt == d->hypot_cap) { __u32 *new_list; - d->hypot_cap += max(16, d->hypot_cap / 2); + d->hypot_cap += max((size_t)16, d->hypot_cap / 2); new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); if (!new_list) return -ENOMEM; @@ -1694,7 +1701,7 @@ static int btf_dedup_strings(struct btf_dedup *d) if (strs.cnt + 1 > strs.cap) { struct btf_str_ptr *new_ptrs; - strs.cap += max(strs.cnt / 2, 16); + strs.cap += max(strs.cnt / 2, 16U); new_ptrs = realloc(strs.ptrs, sizeof(strs.ptrs[0]) * strs.cap); if (!new_ptrs) { @@ -2928,3 +2935,89 @@ static int btf_dedup_remap_types(struct btf_dedup *d) } return 0; } + +static struct btf *btf_load_raw(const char *path) +{ + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + +cleanup: + free(data); + return btf; +} + +/* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ +struct btf *libbpf_find_kernel_btf(void) +{ + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warn("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 8d73f7f5551f..70c1b7ec2bd0 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -102,6 +102,8 @@ LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + struct btf_dedup_opts { unsigned int dedup_table_size; bool dont_resolve_fwds; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index e95f7710f210..bd09ed1710f1 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -18,6 +18,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -139,6 +142,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf, if (IS_ERR(d->type_names)) { err = PTR_ERR(d->type_names); d->type_names = NULL; + goto err; } d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); if (IS_ERR(d->ident_names)) { diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 6122272943e6..54c30c802070 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -12,6 +12,9 @@ #include <linux/err.h> #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7513165b104f..ae34b681ae82 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -55,6 +55,9 @@ #include "libbpf_internal.h" #include "hashmap.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef EM_BPF #define EM_BPF 247 #endif @@ -70,6 +73,12 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) +static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); +static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, + int idx); +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); + static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { @@ -166,6 +175,8 @@ struct bpf_capabilities { __u32 btf_datasec:1; /* BPF_F_MMAPABLE is supported for arrays */ __u32 array_mmap:1; + /* BTF_FUNC_GLOBAL is supported */ + __u32 btf_func_global:1; }; enum reloc_type { @@ -229,10 +240,32 @@ struct bpf_program { __u32 prog_flags; }; +struct bpf_struct_ops { + const char *tname; + const struct btf_type *type; + struct bpf_program **progs; + __u32 *kern_func_off; + /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ + void *data; + /* e.g. struct bpf_struct_ops_tcp_congestion_ops in + * btf_vmlinux's format. + * struct bpf_struct_ops_tcp_congestion_ops { + * [... some other kernel fields ...] + * struct tcp_congestion_ops data; + * } + * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) + * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" + * from "data". + */ + void *kern_vdata; + __u32 type_id; +}; + #define DATA_SEC ".data" #define BSS_SEC ".bss" #define RODATA_SEC ".rodata" #define KCONFIG_SEC ".kconfig" +#define STRUCT_OPS_SEC ".struct_ops" enum libbpf_map_type { LIBBPF_MAP_UNSPEC, @@ -259,10 +292,12 @@ struct bpf_map { struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; void *mmaped; + struct bpf_struct_ops *st_ops; char *pin_path; bool pinned; bool reused; @@ -326,6 +361,7 @@ struct bpf_object { Elf_Data *data; Elf_Data *rodata; Elf_Data *bss; + Elf_Data *st_ops_data; size_t strtabidx; struct { GElf_Shdr shdr; @@ -339,6 +375,7 @@ struct bpf_object { int data_shndx; int rodata_shndx; int bss_shndx; + int st_ops_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -348,6 +385,10 @@ struct bpf_object { struct list_head list; struct btf *btf; + /* Parse and load BTF vmlinux if any of the programs in the object need + * it at load time. + */ + struct btf *btf_vmlinux; struct btf_ext *btf_ext; void *priv; @@ -566,6 +607,348 @@ static __u32 get_kernel_version(void) return KERNEL_VERSION(major, minor, patch); } +static const struct btf_member * +find_member_by_offset(const struct btf_type *t, __u32 bit_offset) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (btf_member_bit_offset(t, i) == bit_offset) + return m; + } + + return NULL; +} + +static const struct btf_member * +find_member_by_name(const struct btf *btf, const struct btf_type *t, + const char *name) +{ + struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) + return m; + } + + return NULL; +} + +#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind); + +static int +find_struct_ops_kern_types(const struct btf *btf, const char *tname, + const struct btf_type **type, __u32 *type_id, + const struct btf_type **vtype, __u32 *vtype_id, + const struct btf_member **data_member) +{ + const struct btf_type *kern_type, *kern_vtype; + const struct btf_member *kern_data_member; + __s32 kern_vtype_id, kern_type_id; + __u32 i; + + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", + tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + + /* Find the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, + * find "struct bpf_struct_ops_tcp_congestion_ops" from the + * btf_vmlinux. + */ + kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, + tname, BTF_KIND_STRUCT); + if (kern_vtype_id < 0) { + pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", + STRUCT_OPS_VALUE_PREFIX, tname); + return kern_vtype_id; + } + kern_vtype = btf__type_by_id(btf, kern_vtype_id); + + /* Find "struct tcp_congestion_ops" from + * struct bpf_struct_ops_tcp_congestion_ops { + * [ ... ] + * struct tcp_congestion_ops data; + * } + */ + kern_data_member = btf_members(kern_vtype); + for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { + if (kern_data_member->type == kern_type_id) + break; + } + if (i == btf_vlen(kern_vtype)) { + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", + tname, STRUCT_OPS_VALUE_PREFIX, tname); + return -EINVAL; + } + + *type = kern_type; + *type_id = kern_type_id; + *vtype = kern_vtype; + *vtype_id = kern_vtype_id; + *data_member = kern_data_member; + + return 0; +} + +static bool bpf_map__is_struct_ops(const struct bpf_map *map) +{ + return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; +} + +/* Init the map's fields that depend on kern_btf */ +static int bpf_map__init_kern_struct_ops(struct bpf_map *map, + const struct btf *btf, + const struct btf *kern_btf) +{ + const struct btf_member *member, *kern_member, *kern_data_member; + const struct btf_type *type, *kern_type, *kern_vtype; + __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_struct_ops *st_ops; + void *data, *kern_data; + const char *tname; + int err; + + st_ops = map->st_ops; + type = st_ops->type; + tname = st_ops->tname; + err = find_struct_ops_kern_types(kern_btf, tname, + &kern_type, &kern_type_id, + &kern_vtype, &kern_vtype_id, + &kern_data_member); + if (err) + return err; + + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", + map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + + map->def.value_size = kern_vtype->size; + map->btf_vmlinux_value_type_id = kern_vtype_id; + + st_ops->kern_vdata = calloc(1, kern_vtype->size); + if (!st_ops->kern_vdata) + return -ENOMEM; + + data = st_ops->data; + kern_data_off = kern_data_member->offset / 8; + kern_data = st_ops->kern_vdata + kern_data_off; + + member = btf_members(type); + for (i = 0; i < btf_vlen(type); i++, member++) { + const struct btf_type *mtype, *kern_mtype; + __u32 mtype_id, kern_mtype_id; + void *mdata, *kern_mdata; + __s64 msize, kern_msize; + __u32 moff, kern_moff; + __u32 kern_member_idx; + const char *mname; + + mname = btf__name_by_offset(btf, member->name_off); + kern_member = find_member_by_name(kern_btf, kern_type, mname); + if (!kern_member) { + pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", + map->name, mname); + return -ENOTSUP; + } + + kern_member_idx = kern_member - btf_members(kern_type); + if (btf_member_bitfield_size(type, i) || + btf_member_bitfield_size(kern_type, kern_member_idx)) { + pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + moff = member->offset / 8; + kern_moff = kern_member->offset / 8; + + mdata = data + moff; + kern_mdata = kern_data + kern_moff; + + mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, + &kern_mtype_id); + if (BTF_INFO_KIND(mtype->info) != + BTF_INFO_KIND(kern_mtype->info)) { + pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", + map->name, mname, BTF_INFO_KIND(mtype->info), + BTF_INFO_KIND(kern_mtype->info)); + return -ENOTSUP; + } + + if (btf_is_ptr(mtype)) { + struct bpf_program *prog; + + mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + kern_mtype = skip_mods_and_typedefs(kern_btf, + kern_mtype->type, + &kern_mtype_id); + if (!btf_is_func_proto(mtype) || + !btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + map->name, mname); + return -ENOTSUP; + } + + prog = st_ops->progs[i]; + if (!prog) { + pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", + map->name, mname); + continue; + } + + prog->attach_btf_id = kern_type_id; + prog->expected_attach_type = kern_member_idx; + + st_ops->kern_func_off[i] = kern_data_off + kern_moff; + + pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", + map->name, mname, prog->name, moff, + kern_moff); + + continue; + } + + msize = btf__resolve_size(btf, mtype_id); + kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); + if (msize < 0 || kern_msize < 0 || msize != kern_msize) { + pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", + map->name, mname, (ssize_t)msize, + (ssize_t)kern_msize); + return -ENOTSUP; + } + + pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", + map->name, mname, (unsigned int)msize, + moff, kern_moff); + memcpy(kern_mdata, mdata, msize); + } + + return 0; +} + +static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) +{ + struct bpf_map *map; + size_t i; + int err; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!bpf_map__is_struct_ops(map)) + continue; + + err = bpf_map__init_kern_struct_ops(map, obj->btf, + obj->btf_vmlinux); + if (err) + return err; + } + + return 0; +} + +static int bpf_object__init_struct_ops_maps(struct bpf_object *obj) +{ + const struct btf_type *type, *datasec; + const struct btf_var_secinfo *vsi; + struct bpf_struct_ops *st_ops; + const char *tname, *var_name; + __s32 type_id, datasec_id; + const struct btf *btf; + struct bpf_map *map; + __u32 i; + + if (obj->efile.st_ops_shndx == -1) + return 0; + + btf = obj->btf; + datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC, + BTF_KIND_DATASEC); + if (datasec_id < 0) { + pr_warn("struct_ops init: DATASEC %s not found\n", + STRUCT_OPS_SEC); + return -EINVAL; + } + + datasec = btf__type_by_id(btf, datasec_id); + vsi = btf_var_secinfos(datasec); + for (i = 0; i < btf_vlen(datasec); i++, vsi++) { + type = btf__type_by_id(obj->btf, vsi->type); + var_name = btf__name_by_offset(obj->btf, type->name_off); + + type_id = btf__resolve_type(obj->btf, vsi->type); + if (type_id < 0) { + pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", + vsi->type, STRUCT_OPS_SEC); + return -EINVAL; + } + + type = btf__type_by_id(obj->btf, type_id); + tname = btf__name_by_offset(obj->btf, type->name_off); + if (!tname[0]) { + pr_warn("struct_ops init: anonymous type is not supported\n"); + return -ENOTSUP; + } + if (!btf_is_struct(type)) { + pr_warn("struct_ops init: %s is not a struct\n", tname); + return -EINVAL; + } + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + + map->sec_idx = obj->efile.st_ops_shndx; + map->sec_offset = vsi->offset; + map->name = strdup(var_name); + if (!map->name) + return -ENOMEM; + + map->def.type = BPF_MAP_TYPE_STRUCT_OPS; + map->def.key_size = sizeof(int); + map->def.value_size = type->size; + map->def.max_entries = 1; + + map->st_ops = calloc(1, sizeof(*map->st_ops)); + if (!map->st_ops) + return -ENOMEM; + st_ops = map->st_ops; + st_ops->data = malloc(type->size); + st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); + st_ops->kern_func_off = malloc(btf_vlen(type) * + sizeof(*st_ops->kern_func_off)); + if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) + return -ENOMEM; + + if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) { + pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", + var_name, STRUCT_OPS_SEC); + return -EINVAL; + } + + memcpy(st_ops->data, + obj->efile.st_ops_data->d_buf + vsi->offset, + type->size); + st_ops->tname = tname; + st_ops->type = type; + st_ops->type_id = type_id; + + pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", + tname, type_id, var_name, vsi->offset); + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, const void *obj_buf, size_t obj_buf_sz, @@ -607,6 +990,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.data_shndx = -1; obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); @@ -630,6 +1014,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.data = NULL; obj->efile.rodata = NULL; obj->efile.bss = NULL; + obj->efile.st_ops_data = NULL; zfree(&obj->efile.reloc_sects); obj->efile.nr_reloc_sects = 0; @@ -735,16 +1120,6 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) return 0; } -static int compare_bpf_map(const void *_a, const void *_b) -{ - const struct bpf_map *a = _a; - const struct bpf_map *b = _b; - - if (a->sec_idx != b->sec_idx) - return a->sec_idx - b->sec_idx; - return a->sec_offset - b->sec_offset; -} - static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) { if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -815,6 +1190,9 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, } else if (!strcmp(name, RODATA_SEC)) { if (obj->efile.rodata) *size = obj->efile.rodata->d_size; + } else if (!strcmp(name, STRUCT_OPS_SEC)) { + if (obj->efile.st_ops_data) + *size = obj->efile.st_ops_data->d_size; } else { ret = bpf_object_search_section_size(obj, name, &d_size); if (!ret) @@ -898,7 +1276,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map) long page_sz = sysconf(_SC_PAGE_SIZE); size_t map_sz; - map_sz = roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; map_sz = roundup(map_sz, page_sz); return map_sz; } @@ -1440,6 +1818,20 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) return t; } +static const struct btf_type * +resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) +{ + const struct btf_type *t; + + t = skip_mods_and_typedefs(btf, id, NULL); + if (!btf_is_ptr(t)) + return NULL; + + t = skip_mods_and_typedefs(btf, t->type, res_id); + + return btf_is_func_proto(t) ? t : NULL; +} + /* * Fetch integer attribute of BTF map definition. Such attributes are * represented using a pointer to an array, in which dimensionality of array @@ -1787,13 +2179,10 @@ static int bpf_object__init_maps(struct bpf_object *obj, err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); + err = err ?: bpf_object__init_struct_ops_maps(obj); if (err) return err; - if (obj->nr_maps) { - qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), - compare_bpf_map); - } return 0; } @@ -1817,13 +2206,14 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static void bpf_object__sanitize_btf(struct bpf_object *obj) { + bool has_func_global = obj->caps.btf_func_global; bool has_datasec = obj->caps.btf_datasec; bool has_func = obj->caps.btf_func; struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - if (!obj->btf || (has_func && has_datasec)) + if (!obj->btf || (has_func && has_datasec && has_func_global)) return; for (i = 1; i <= btf__get_nr_types(btf); i++) { @@ -1871,6 +2261,9 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj) } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); + } else if (!has_func_global && btf_is_func(t)) { + /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ + t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); } } } @@ -1889,23 +2282,26 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { return obj->efile.btf_maps_shndx >= 0 || - obj->nr_extern > 0; + obj->efile.st_ops_shndx >= 0 || + obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, Elf_Data *btf_data, Elf_Data *btf_ext_data) { - bool btf_required = bpf_object__is_btf_mandatory(obj); - int err = 0; + int err = -ENOENT; if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); if (IS_ERR(obj->btf)) { + err = PTR_ERR(obj->btf); + obj->btf = NULL; pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); goto out; } + err = 0; } if (btf_ext_data) { if (!obj->btf) { @@ -1923,18 +2319,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, } } out: - if (err || IS_ERR(obj->btf)) { - if (btf_required) - err = err ? : PTR_ERR(obj->btf); - else - err = 0; - if (!IS_ERR_OR_NULL(obj->btf)) - btf__free(obj->btf); - obj->btf = NULL; - } - if (btf_required && !obj->btf) { + if (err && bpf_object__is_btf_mandatory(obj)) { pr_warn("BTF is required, but is missing or corrupted.\n"); - return err == 0 ? -ENOENT : err; + return err; } return 0; } @@ -1963,6 +2350,41 @@ static int bpf_object__finalize_btf(struct bpf_object *obj) return 0; } +static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) +{ + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + return true; + + /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs + * also need vmlinux BTF + */ + if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) + return true; + + return false; +} + +static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) +{ + struct bpf_program *prog; + int err; + + bpf_object__for_each_program(prog, obj) { + if (libbpf_prog_needs_vmlinux_btf(prog)) { + obj->btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(obj->btf_vmlinux)) { + err = PTR_ERR(obj->btf_vmlinux); + pr_warn("Error loading vmlinux BTF: %d\n", err); + obj->btf_vmlinux = NULL; + return err; + } + return 0; + } + } + + return 0; +} + static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { int err = 0; @@ -2088,6 +2510,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, RODATA_SEC) == 0) { obj->efile.rodata = data; obj->efile.rodata_shndx = idx; + } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { + obj->efile.st_ops_data = data; + obj->efile.st_ops_shndx = idx; } else { pr_debug("skip section(%d) %s\n", idx, name); } @@ -2097,7 +2522,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) int sec = sh.sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ - if (!section_have_execinstr(obj, sec)) { + if (!section_have_execinstr(obj, sec) && + strcmp(name, ".rel" STRUCT_OPS_SEC)) { pr_debug("skip relo %s(%d) for section(%d)\n", name, idx, sec); continue; @@ -2599,8 +3025,12 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) __u32 key_type_id = 0, value_type_id = 0; int ret; - /* if it's BTF-defined map, we don't need to search for type IDs */ - if (map->sec_idx == obj->efile.btf_maps_shndx) + /* if it's BTF-defined map, we don't need to search for type IDs. + * For struct_ops map, it does not need btf_key_type_id and + * btf_value_type_id. + */ + if (map->sec_idx == obj->efile.btf_maps_shndx || + bpf_map__is_struct_ops(map)) return 0; if (!bpf_map__is_internal(map)) { @@ -2804,6 +3234,32 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) return 0; } +static int bpf_object__probe_btf_func_global(struct bpf_object *obj) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + int btf_fd; + + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs)); + if (btf_fd >= 0) { + obj->caps.btf_func_global = 1; + close(btf_fd); + return 1; + } + + return 0; +} + static int bpf_object__probe_btf_datasec(struct bpf_object *obj) { static const char strs[] = "\0x\0.data"; @@ -2859,6 +3315,7 @@ bpf_object__probe_caps(struct bpf_object *obj) bpf_object__probe_name, bpf_object__probe_global_data, bpf_object__probe_btf_func, + bpf_object__probe_btf_func_global, bpf_object__probe_btf_datasec, bpf_object__probe_array_mmap, }; @@ -3025,6 +3482,9 @@ bpf_object__create_maps(struct bpf_object *obj) if (bpf_map_type__is_map_in_map(def->type) && map->inner_map_fd >= 0) create_attr.inner_map_fd = map->inner_map_fd; + if (bpf_map__is_struct_ops(map)) + create_attr.btf_vmlinux_value_type_id = + map->btf_vmlinux_value_type_id; if (obj->btf && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -3860,92 +4320,6 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return 0; } -static struct btf *btf_load_raw(const char *path) -{ - struct btf *btf; - size_t read_cnt; - struct stat st; - void *data; - FILE *f; - - if (stat(path, &st)) - return ERR_PTR(-errno); - - data = malloc(st.st_size); - if (!data) - return ERR_PTR(-ENOMEM); - - f = fopen(path, "rb"); - if (!f) { - btf = ERR_PTR(-errno); - goto cleanup; - } - - read_cnt = fread(data, 1, st.st_size, f); - fclose(f); - if (read_cnt < st.st_size) { - btf = ERR_PTR(-EBADF); - goto cleanup; - } - - btf = btf__new(data, read_cnt); - -cleanup: - free(data); - return btf; -} - -/* - * Probe few well-known locations for vmlinux kernel image and try to load BTF - * data out of it to use for target BTF. - */ -static struct btf *bpf_core_find_kernel_btf(void) -{ - struct { - const char *path_fmt; - bool raw_btf; - } locations[] = { - /* try canonical vmlinux BTF through sysfs first */ - { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, - /* fall back to trying to find vmlinux ELF on disk otherwise */ - { "/boot/vmlinux-%1$s" }, - { "/lib/modules/%1$s/vmlinux-%1$s" }, - { "/lib/modules/%1$s/build/vmlinux" }, - { "/usr/lib/modules/%1$s/kernel/vmlinux" }, - { "/usr/lib/debug/boot/vmlinux-%1$s" }, - { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, - { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, - }; - char path[PATH_MAX + 1]; - struct utsname buf; - struct btf *btf; - int i; - - uname(&buf); - - for (i = 0; i < ARRAY_SIZE(locations); i++) { - snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); - - if (access(path, R_OK)) - continue; - - if (locations[i].raw_btf) - btf = btf_load_raw(path); - else - btf = btf__parse_elf(path, NULL); - - pr_debug("loading kernel BTF '%s': %ld\n", - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); - if (IS_ERR(btf)) - continue; - - return btf; - } - - pr_warn("failed to find valid kernel BTF\n"); - return ERR_PTR(-ESRCH); -} - /* Output spec definition in the format: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b @@ -4180,7 +4554,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else - targ_btf = bpf_core_find_kernel_btf(); + targ_btf = libbpf_find_kernel_btf(); if (IS_ERR(targ_btf)) { pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); @@ -4252,13 +4626,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (prog->idx == obj->efile.text_shndx) { - pr_warn("relo in .text insn %d into off %d (insn #%d)\n", - relo->insn_idx, relo->sym_off, relo->sym_off / 8); - return -LIBBPF_ERRNO__RELOC; - } - - if (prog->main_prog_cnt == 0) { + if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { pr_warn("no .text section found yet relo into text exist\n"); @@ -4288,6 +4656,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, text->insns_cnt, text->section_name, prog->section_name); } + insn = &prog->insns[relo->insn_idx]; insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; return 0; @@ -4367,8 +4736,28 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } + /* ensure .text is relocated first, as it's going to be copied as-is + * later for sub-program calls + */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + if (prog->idx != obj->efile.text_shndx) + continue; + + err = bpf_program__relocate(prog, obj); + if (err) { + pr_warn("failed to relocate '%s'\n", prog->section_name); + return err; + } + break; + } + /* now relocate everything but .text, which by now is relocated + * properly, so we can copy raw sub-program instructions as is safely + */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; + if (prog->idx == obj->efile.text_shndx) + continue; err = bpf_program__relocate(prog, obj); if (err) { @@ -4379,6 +4768,10 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return 0; } +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data); + static int bpf_object__collect_reloc(struct bpf_object *obj) { int i, err; @@ -4399,6 +4792,15 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) return -LIBBPF_ERRNO__INTERNAL; } + if (idx == obj->efile.st_ops_shndx) { + err = bpf_object__collect_struct_ops_map_reloc(obj, + shdr, + data); + if (err) + return err; + continue; + } + prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { pr_warn("relocation failed: no section(%d)\n", idx); @@ -4433,7 +4835,10 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - if (prog->type == BPF_PROG_TYPE_TRACING) { + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + load_attr.attach_btf_id = prog->attach_btf_id; + } else if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_btf_id = prog->attach_btf_id; } else { @@ -4508,18 +4913,15 @@ out: return ret; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); +static int libbpf_find_attach_btf_id(struct bpf_program *prog); int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i, btf_id; - if (prog->type == BPF_PROG_TYPE_TRACING) { - btf_id = libbpf_find_attach_btf_id(prog->section_name, - prog->expected_attach_type, - prog->attach_prog_fd); + if (prog->type == BPF_PROG_TYPE_TRACING || + prog->type == BPF_PROG_TYPE_EXT) { + btf_id = libbpf_find_attach_btf_id(prog); if (btf_id <= 0) return btf_id; prog->attach_btf_id = btf_id; @@ -4679,6 +5081,9 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; + if (prog->type != BPF_PROG_TYPE_UNSPEC) + continue; + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, &attach_type); if (err == -ESRCH) @@ -4689,7 +5094,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) + if (prog_type == BPF_PROG_TYPE_TRACING || + prog_type == BPF_PROG_TYPE_EXT) prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } @@ -4774,8 +5180,11 @@ int bpf_object__unload(struct bpf_object *obj) if (!obj) return -EINVAL; - for (i = 0; i < obj->nr_maps; i++) + for (i = 0; i < obj->nr_maps; i++) { zclose(obj->maps[i].fd); + if (obj->maps[i].st_ops) + zfree(&obj->maps[i].st_ops->kern_vdata); + } for (i = 0; i < obj->nr_programs; i++) bpf_program__unload(&obj->programs[i]); @@ -4891,9 +5300,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); err = err ? : bpf_object__relocate(obj, attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); + + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; + if (err) goto out; @@ -5478,6 +5893,13 @@ void bpf_object__close(struct bpf_object *obj) map->mmaped = NULL; } + if (map->st_ops) { + zfree(&map->st_ops->data); + zfree(&map->st_ops->progs); + zfree(&map->st_ops->kern_func_off); + zfree(&map->st_ops); + } + zfree(&map->name); zfree(&map->pin_path); } @@ -5746,6 +6168,8 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); +BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); +BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); enum bpf_attach_type bpf_program__get_expected_attach_type(struct bpf_program *prog) @@ -5845,6 +6269,9 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_FEXIT, .is_attach_btf = true, .attach_fn = attach_trace), + SEC_DEF("freplace/", EXT, + .is_attach_btf = true, + .attach_fn = attach_trace), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5899,6 +6326,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_CGROUP_GETSOCKOPT), BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT), + BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), }; #undef BPF_PROG_SEC_IMPL @@ -5975,34 +6403,182 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, return -ESRCH; } -#define BTF_PREFIX "btf_trace_" +static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, + size_t offset) +{ + struct bpf_map *map; + size_t i; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + if (!bpf_map__is_struct_ops(map)) + continue; + if (map->sec_offset <= offset && + offset - map->sec_offset < map->def.value_size) + return map; + } + + return NULL; +} + +/* Collect the reloc from ELF and populate the st_ops->progs[] */ +static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, + GElf_Shdr *shdr, + Elf_Data *data) +{ + const struct btf_member *member; + struct bpf_struct_ops *st_ops; + struct bpf_program *prog; + unsigned int shdr_idx; + const struct btf *btf; + struct bpf_map *map; + Elf_Data *symbols; + unsigned int moff; + const char *name; + __u32 member_idx; + GElf_Sym sym; + GElf_Rel rel; + int i, nrels; + + symbols = obj->efile.symbols; + btf = obj->btf; + nrels = shdr->sh_size / shdr->sh_entsize; + for (i = 0; i < nrels; i++) { + if (!gelf_getrel(data, i, &rel)) { + pr_warn("struct_ops reloc: failed to get %d reloc\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn("struct_ops reloc: symbol %zx not found\n", + (size_t)GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : "<?>"; + map = find_struct_ops_map_by_offset(obj, rel.r_offset); + if (!map) { + pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", + (size_t)rel.r_offset); + return -EINVAL; + } + + moff = rel.r_offset - map->sec_offset; + shdr_idx = sym.st_shndx; + st_ops = map->st_ops; + pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel.r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", + map->name, + (long long)(rel.r_info >> 32), + (long long)sym.st_value, + shdr_idx, (size_t)rel.r_offset, + map->sec_offset, sym.st_name, name); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warn("struct_ops reloc %s: rel.r_offset %zu shdr_idx %u unsupported non-static function\n", + map->name, (size_t)rel.r_offset, shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + member = find_member_by_offset(st_ops->type, moff * 8); + if (!member) { + pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", + map->name, moff); + return -EINVAL; + } + member_idx = member - btf_members(st_ops->type); + name = btf__name_by_offset(btf, member->name_off); + + if (!resolve_func_ptr(btf, member->type, NULL)) { + pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", + map->name, name); + return -EINVAL; + } + + prog = bpf_object__find_prog_by_idx(obj, shdr_idx); + if (!prog) { + pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", + map->name, shdr_idx, name); + return -EINVAL; + } + + if (prog->type == BPF_PROG_TYPE_UNSPEC) { + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(prog->section_name); + if (sec_def && + sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) { + /* for pr_warn */ + prog->type = sec_def->prog_type; + goto invalid_prog; + } + + prog->type = BPF_PROG_TYPE_STRUCT_OPS; + prog->attach_btf_id = st_ops->type_id; + prog->expected_attach_type = member_idx; + } else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || + prog->attach_btf_id != st_ops->type_id || + prog->expected_attach_type != member_idx) { + goto invalid_prog; + } + st_ops->progs[member_idx] = prog; + } + + return 0; + +invalid_prog: + pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", + map->name, prog->name, prog->section_name, prog->type, + prog->attach_btf_id, prog->expected_attach_type, name); + return -EINVAL; +} + +#define BTF_TRACE_PREFIX "btf_trace_" +#define BTF_MAX_NAME_SIZE 128 + +static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, + const char *name, __u32 kind) +{ + char btf_type_name[BTF_MAX_NAME_SIZE]; + int ret; + + ret = snprintf(btf_type_name, sizeof(btf_type_name), + "%s%s", prefix, name); + /* snprintf returns the number of characters written excluding the + * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it + * indicates truncation. + */ + if (ret < 0 || ret >= sizeof(btf_type_name)) + return -ENAMETOOLONG; + return btf__find_by_name_kind(btf, btf_type_name, kind); +} + +static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, + enum bpf_attach_type attach_type) +{ + int err; + + if (attach_type == BPF_TRACE_RAW_TP) + err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, + BTF_KIND_TYPEDEF); + else + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + + return err; +} + int libbpf_find_vmlinux_btf_id(const char *name, enum bpf_attach_type attach_type) { - struct btf *btf = bpf_core_find_kernel_btf(); - char raw_tp_btf[128] = BTF_PREFIX; - char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; - const char *btf_name; - int err = -EINVAL; - __u32 kind; + struct btf *btf; + btf = libbpf_find_kernel_btf(); if (IS_ERR(btf)) { pr_warn("vmlinux BTF is not found\n"); return -EINVAL; } - if (attach_type == BPF_TRACE_RAW_TP) { - /* prepend "btf_trace_" prefix per kernel convention */ - strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); - btf_name = raw_tp_btf; - kind = BTF_KIND_TYPEDEF; - } else { - btf_name = name; - kind = BTF_KIND_FUNC; - } - err = btf__find_by_name_kind(btf, btf_name, kind); - btf__free(btf); - return err; + return __find_vmlinux_btf_id(btf, name, attach_type); } static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) @@ -6038,10 +6614,11 @@ out: return err; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd) +static int libbpf_find_attach_btf_id(struct bpf_program *prog) { + enum bpf_attach_type attach_type = prog->expected_attach_type; + __u32 attach_prog_fd = prog->attach_prog_fd; + const char *name = prog->section_name; int i, err; if (!name) @@ -6056,8 +6633,9 @@ static int libbpf_find_attach_btf_id(const char *name, err = libbpf_find_prog_btf_id(name + section_defs[i].len, attach_prog_fd); else - err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len, - attach_type); + err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, + name + section_defs[i].len, + attach_type); if (err <= 0) pr_warn("%s is not found in vmlinux BTF\n", name); return err; @@ -6805,6 +7383,58 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog) return sec_def->attach_fn(sec_def, prog); } +static int bpf_link__detach_struct_ops(struct bpf_link *link) +{ + struct bpf_link_fd *l = (void *)link; + __u32 zero = 0; + + if (bpf_map_delete_elem(l->fd, &zero)) + return -errno; + + return 0; +} + +struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) +{ + struct bpf_struct_ops *st_ops; + struct bpf_link_fd *link; + __u32 i, zero = 0; + int err; + + if (!bpf_map__is_struct_ops(map) || map->fd == -1) + return ERR_PTR(-EINVAL); + + link = calloc(1, sizeof(*link)); + if (!link) + return ERR_PTR(-EINVAL); + + st_ops = map->st_ops; + for (i = 0; i < btf_vlen(st_ops->type); i++) { + struct bpf_program *prog = st_ops->progs[i]; + void *kern_data; + int prog_fd; + + if (!prog) + continue; + + prog_fd = bpf_program__fd(prog); + kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; + *(unsigned long *)kern_data = prog_fd; + } + + err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0); + if (err) { + err = -errno; + free(link); + return ERR_PTR(err); + } + + link->link.detach = bpf_link__detach_struct_ops; + link->fd = map->fd; + + return (struct bpf_link *)link; +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index fe592ef48f1b..2a5e3b087002 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -239,6 +239,8 @@ bpf_program__attach_raw_tracepoint(struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_trace(struct bpf_program *prog); +struct bpf_map; +LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_insn; /* @@ -315,6 +317,8 @@ LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, @@ -335,6 +339,8 @@ LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -354,7 +360,6 @@ struct bpf_map_def { * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel, * so no need to worry about a name clash. */ -struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name); @@ -521,6 +526,7 @@ LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); /* * Get bpf_prog_info in continuous memory diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e9713a574243..b035122142bb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -213,14 +213,25 @@ LIBBPF_0.0.7 { global: btf_dump__emit_type_decl; bpf_link__disconnect; + bpf_map__attach_struct_ops; + bpf_map_delete_batch; + bpf_map_lookup_and_delete_batch; + bpf_map_lookup_batch; + bpf_map_update_batch; bpf_object__find_program_by_name; bpf_object__attach_skeleton; bpf_object__destroy_skeleton; bpf_object__detach_skeleton; bpf_object__load_skeleton; bpf_object__open_skeleton; + bpf_probe_large_insn_limit; bpf_prog_attach_xattr; bpf_program__attach; bpf_program__name; + bpf_program__is_extension; + bpf_program__is_struct_ops; + bpf_program__set_extension; + bpf_program__set_struct_ops; btf__align_of; + libbpf_find_kernel_btf; } LIBBPF_0.0.6; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index 4343e40588c6..0afb51f7a919 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -13,6 +13,9 @@ #include "libbpf.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) #define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) #define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index a9eb8b322671..b782ebef6ac9 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,6 +17,9 @@ #include "libbpf.h" #include "libbpf_internal.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static bool grep(const char *buffer, const char *pattern) { return !!strstr(buffer, pattern); @@ -103,6 +106,8 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_STRUCT_OPS: + case BPF_PROG_TYPE_EXT: default: break; } @@ -251,6 +256,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STRUCT_OPS: default: break; } @@ -321,3 +327,24 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, return res; } + +/* + * Probe for availability of kernel commit (5.3): + * + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") + */ +bool bpf_probe_large_insn_limit(__u32 ifindex) +{ + struct bpf_insn insns[BPF_MAXINSNS + 1]; + int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + errno = 0; + probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); + + return errno != E2BIG && errno != EINVAL; +} diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 5065c1aa1061..431bd25c6cdb 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -15,6 +15,9 @@ #include "libbpf_internal.h" #include "nlattr.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_NETLINK #define SOL_NETLINK 270 #endif diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 8db44bbfc66d..0ad41dfea8eb 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -13,6 +13,9 @@ #include <string.h> #include <stdio.h> +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = { [LIBBPF_NLA_U8] = sizeof(uint8_t), [LIBBPF_NLA_U16] = sizeof(uint16_t), diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index b8064eedc177..146da01979c7 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -4,6 +4,9 @@ #include <stdio.h> #include "str_error.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + /* * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl * libc, while checking strerror_r() return to avoid having to check this in diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 8e0ffa800a71..9807903f121e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -32,6 +32,9 @@ #include "libbpf_internal.h" #include "xsk.h" +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + #ifndef SOL_XDP #define SOL_XDP 283 #endif diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index f3cbf86e51ac..20eed719542e 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1228,8 +1228,10 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event, } filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) + if (filter_type == NULL) { + free_arg(arg); return TEP_ERRNO__MEM_ALLOC_FAILED; + } if (filter_type->filter) free_arg(filter_type->filter); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 387311c67264..de988589d99b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1076,6 +1076,7 @@ int cmd_report(int argc, const char **argv) struct stat st; bool has_br_stack = false; int branch_mode = -1; + int last_key = 0; bool branch_call_mode = false; #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" @@ -1450,7 +1451,8 @@ repeat: sort_order = sort_tmp; } - if (setup_sorting(session->evlist) < 0) { + if ((last_key != K_SWITCH_INPUT_DATA) && + (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); if (field_order) @@ -1530,6 +1532,7 @@ repeat: ret = __cmd_report(&report); if (ret == K_SWITCH_INPUT_DATA) { perf_session__delete(session); + last_key = K_SWITCH_INPUT_DATA; goto repeat; } else ret = 0; diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index b9c203219691..49f4f84da485 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include <bpf.h> +#include <bpf/bpf.h> int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec) { diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 3776d26db9e7..7d7fb0c9fe76 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,3 @@ -#include <bpf.h> +#include <bpf/bpf.h> license(GPL); diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c index 9cd124b09392..c4481c390d23 100644 --- a/tools/perf/examples/bpf/sys_enter_openat.c +++ b/tools/perf/examples/bpf/sys_enter_openat.c @@ -14,7 +14,7 @@ * the return value. */ -#include <bpf.h> +#include <bpf/bpf.h> struct syscall_enter_openat_args { unsigned long long unused; diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h index 6e61c4bdf548..607189a315b2 100644 --- a/tools/perf/include/bpf/pid_filter.h +++ b/tools/perf/include/bpf/pid_filter.h @@ -3,7 +3,7 @@ #ifndef _PERF_BPF_PID_FILTER_ #define _PERF_BPF_PID_FILTER_ -#include <bpf.h> +#include <bpf/bpf.h> #define pid_filter(name) pid_map(name, bool) diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h index 316af5b2ff35..7ca6fa5463ee 100644 --- a/tools/perf/include/bpf/stdio.h +++ b/tools/perf/include/bpf/stdio.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <bpf.h> +#include <bpf/bpf.h> struct bpf_map SEC("maps") __bpf_stdout__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h index ca7877f9a976..d1a35b6c649d 100644 --- a/tools/perf/include/bpf/unistd.h +++ b/tools/perf/include/bpf/unistd.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: LGPL-2.1 -#include <bpf.h> +#include <bpf/bpf.h> static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 45286900aacb..0aa63aeb58ec 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -339,10 +339,10 @@ static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) #define hists__for_each_format(hists, format) \ - perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + perf_hpp_list__for_each_format((hists)->hpp_list, format) #define hists__for_each_sort_list(hists, format) \ - perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) + perf_hpp_list__for_each_sort_list((hists)->hpp_list, format) extern struct perf_hpp_fmt perf_hpp__format[]; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6658fbf196e6..1965aefccb02 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -920,6 +920,9 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, if (curr_map == NULL) return -1; + if (curr_dso->kernel) + map__kmap(curr_map)->kmaps = kmaps; + if (adjust_kernel_syms) { curr_map->start = shdr->sh_addr + ref_reloc(kmap); curr_map->end = curr_map->start + shdr->sh_size; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b001c602414b..b45a8f77ee24 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -32,6 +32,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += net/mptcp TARGETS += netfilter TARGETS += networking/timestamping TARGETS += nsfs diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 301ac12d5d69..ec464859c6b6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -22,17 +22,13 @@ get_cgroup_id_user test_skb_cgroup_id_user test_socket_cookie test_cgroup_storage -test_select_reuseport test_flow_dissector flow_dissector_load test_netcnt -test_section_names test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -libbpf.pc -libbpf.so.* test_hashmap test_btf_dump xdping @@ -41,4 +37,4 @@ test_cpp /no_alu32 /bpf_gcc /tools -bpf_helper_defs.h + diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f1f949cd8ed9..5f41f5bd8033 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -20,8 +20,8 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \ - -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \ +CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ + -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread @@ -73,7 +73,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp + test_lirc_mode2_user xdping test_cpp runqslower TEST_CUSTOM_PROGS = urandom_read @@ -83,20 +83,29 @@ TEST_CUSTOM_PROGS = urandom_read # $3 - target (assumed to be file); only file name will be emitted; # $4 - optional extra arg, emitted as-is, if provided. ifeq ($(V),1) +Q = msg = else -msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4)) +Q = @ +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 endif # override lib.mk's default rules OVERRIDE_TARGETS := 1 override define CLEAN - $(call msg, CLEAN) + $(call msg,CLEAN) $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef include ../lib.mk +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +INCLUDE_DIR := $(SCRATCH_DIR)/include +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + # Define simple and short `make test_progs`, `make test_sysctl`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static @@ -108,18 +117,25 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; $(OUTPUT)/%:%.c - $(call msg, BINARY,,$@) + $(call msg,BINARY,,$@) $(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c - $(call msg, BINARY,,$@) - $(CC) -o $@ $< -Wl,--build-id + $(call msg,BINARY,,$@) + $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id -$(OUTPUT)/test_stub.o: test_stub.c - $(call msg, CC,,$@) +$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) + $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -BPFOBJ := $(OUTPUT)/libbpf.a +VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) +$(OUTPUT)/runqslower: $(BPFOBJ) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) @@ -137,23 +153,22 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -.PHONY: force - -# force a rebuild of BPFOBJ when its dependencies are updated -force: - -DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) +$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + OUTPUT=$(BUILD_DIR)/bpftool/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install -$(DEFAULT_BPFTOOL): force - $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install - -$(BPFOBJ): force - $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ +$(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile) \ + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers -BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h) -$(OUTPUT)/bpf_helper_defs.h: - $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h +$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): + $(call msg,MKDIR,,$@) + mkdir -p $@ # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -173,8 +188,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ - -I. -I./include/uapi -I$(APIDIR) \ - -I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include) + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ + -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types @@ -190,28 +205,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $3 - CFLAGS # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE - $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE - $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2) + $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef @@ -252,6 +267,7 @@ define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): + $$(call msg,MKDIR,,$$@) mkdir -p $$@ endif @@ -262,7 +278,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ - $$(BPF_HELPERS) | $(TRUNNER_OUTPUT) + $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) @@ -270,7 +286,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) - $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) $$(BPFTOOL) gen skeleton $$< > $$@ endif @@ -278,7 +294,7 @@ endif ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c - $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@) + $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) $$(shell ( cd $(TRUNNER_TESTS_DIR); \ echo '/* Generated header, do not edit */'; \ ls *.c 2> /dev/null | \ @@ -294,7 +310,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@) + $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ @@ -302,20 +318,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@) + $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) - $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) + $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ | $(TRUNNER_BINARY)-extras - $$(call msg, BINARY,,$$@) + $$(call msg,BINARY,,$$@) $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ endef @@ -328,7 +344,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) TRUNNER_BPF_LDFLAGS := -mattr=+alu32 $(eval $(call DEFINE_TEST_RUNNER,test_progs)) @@ -367,15 +383,15 @@ verifier/tests.h: verifier/*.c echo '#endif' \ ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) - $(call msg, BINARY,,$@) + $(call msg,BINARY,,$@) $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) - $(call msg, CXX,,$@) + $(call msg,CXX,,$@) $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ - feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \ - tools *.skel.h + feature \ + $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc) diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h new file mode 100644 index 000000000000..8f21965ffc6c --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_TCP_HELPERS_H +#define __BPF_TCP_HELPERS_H + +#include <stdbool.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_trace_helpers.h" + +#define BPF_STRUCT_OPS(name, args...) \ +SEC("struct_ops/"#name) \ +BPF_PROG(name, args) + +#define tcp_jiffies32 ((__u32)bpf_jiffies64()) + +struct sock_common { + unsigned char skc_state; +} __attribute__((preserve_access_index)); + +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + +struct sock { + struct sock_common __sk_common; + unsigned long sk_pacing_rate; + __u32 sk_pacing_status; /* see enum sk_pacing */ +} __attribute__((preserve_access_index)); + +struct inet_sock { + struct sock sk; +} __attribute__((preserve_access_index)); + +struct inet_connection_sock { + struct inet_sock icsk_inet; + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, + icsk_ca_dst_locked:1; + struct { + __u8 pending; + } icsk_ack; + __u64 icsk_ca_priv[104 / sizeof(__u64)]; +} __attribute__((preserve_access_index)); + +struct tcp_sock { + struct inet_connection_sock inet_conn; + + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u8 ecn_flags; + __u32 delivered; + __u32 delivered_ce; + __u32 snd_cwnd; + __u32 snd_cwnd_cnt; + __u32 snd_cwnd_clamp; + __u32 snd_ssthresh; + __u8 syn_data:1, /* SYN includes data */ + syn_fastopen:1, /* SYN includes Fast Open option */ + syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + syn_smc:1; /* SYN includes SMC */ + __u32 max_packets_out; + __u32 lsndtime; + __u32 prior_cwnd; + __u64 tcp_mstamp; /* most recent packet received/sent */ +} __attribute__((preserve_access_index)); + +static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +static __always_inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + +static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +static __always_inline bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) < 0; +} +#define after(seq2, seq1) before(seq1, seq2) + +#define TCP_ECN_OK 1 +#define TCP_ECN_QUEUE_CWR 2 +#define TCP_ECN_DEMAND_CWR 4 +#define TCP_ECN_SEEN 8 + +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4, + ICSK_ACK_PUSHED2 = 8, + ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ +}; + +enum tcp_ca_event { + CA_EVENT_TX_START = 0, + CA_EVENT_CWND_RESTART = 1, + CA_EVENT_COMPLETE_CWR = 2, + CA_EVENT_LOSS = 3, + CA_EVENT_ECN_NO_CE = 4, + CA_EVENT_ECN_IS_CE = 5, +}; + +enum tcp_ca_state { + TCP_CA_Open = 0, + TCP_CA_Disorder = 1, + TCP_CA_CWR = 2, + TCP_CA_Recovery = 3, + TCP_CA_Loss = 4 +}; + +struct ack_sample { + __u32 pkts_acked; + __s32 rtt_us; + __u32 in_flight; +} __attribute__((preserve_access_index)); + +struct rate_sample { + __u64 prior_mstamp; /* starting timestamp for interval */ + __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + __s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + __u32 snd_interval_us; /* snd interval for delivered packets */ + __u32 rcv_interval_us; /* rcv interval for delivered packets */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + __u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ + bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ +} __attribute__((preserve_access_index)); + +#define TCP_CA_NAME_MAX 16 +#define TCP_CONG_NEEDS_ECN 0x2 + +struct tcp_congestion_ops { + char name[TCP_CA_NAME_MAX]; + __u32 flags; + + /* initialize private data (optional) */ + void (*init)(struct sock *sk); + /* cleanup private data (optional) */ + void (*release)(struct sock *sk); + + /* return slow start threshold (required) */ + __u32 (*ssthresh)(struct sock *sk); + /* do new cwnd calculation (required) */ + void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); + /* call before changing ca_state (optional) */ + void (*set_state)(struct sock *sk, __u8 new_state); + /* call when cwnd event occurs (optional) */ + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* call when ack arrives (optional) */ + void (*in_ack_event)(struct sock *sk, __u32 flags); + /* new value of cwnd after loss (required) */ + __u32 (*undo_cwnd)(struct sock *sk); + /* hook for packet ack accounting (optional) */ + void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* override sysctl_tcp_min_tso_segs */ + __u32 (*min_tso_segs)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + __u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); +}; + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) +{ + __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); + + acked -= cwnd - tp->snd_cwnd; + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + + return acked; +} + +static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + +static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tcp_in_slow_start(tp)) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); +} + +static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) +{ + /* If credits accumulated at a higher w, apply them gently now. */ + if (tp->snd_cwnd_cnt >= w) { + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd++; + } + + tp->snd_cwnd_cnt += acked; + if (tp->snd_cwnd_cnt >= w) { + __u32 delta = tp->snd_cwnd_cnt / w; + + tp->snd_cwnd_cnt -= delta * w; + tp->snd_cwnd += delta; + } + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); +} + +#endif diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h index c76a214a53b0..c6f1354d93fb 100644 --- a/tools/testing/selftests/bpf/bpf_trace_helpers.h +++ b/tools/testing/selftests/bpf/bpf_trace_helpers.h @@ -1,58 +1,120 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #ifndef __BPF_TRACE_HELPERS_H #define __BPF_TRACE_HELPERS_H -#include "bpf_helpers.h" - -#define __BPF_MAP_0(i, m, v, ...) v -#define __BPF_MAP_1(i, m, v, t, a, ...) m(t, a, ctx[i]) -#define __BPF_MAP_2(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_1(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_3(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_2(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_4(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_3(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_5(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_4(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_6(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_5(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_7(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_6(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_8(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_7(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_9(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_8(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_10(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_9(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_11(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_10(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP_12(i, m, v, t, a, ...) m(t, a, ctx[i]), __BPF_MAP_11(i+1, m, v, __VA_ARGS__) -#define __BPF_MAP(n, ...) __BPF_MAP_##n(0, __VA_ARGS__) - -/* BPF sizeof(void *) is always 8, so no need to cast to long first - * for ptr to avoid compiler warning. +#include <bpf/bpf_helpers.h> + +#define ___bpf_concat(a, b) a ## b +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_empty(...) \ + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + +/* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and + * similar kinds of BPF programs, that accept input arguments as a single + * pointer to untyped u64 array, where each u64 can actually be a typed + * pointer or integer of different size. Instead of requring user to write + * manual casts and work with array elements by index, BPF_PROG macro + * allows user to declare a list of named and typed input arguments in the + * same syntax as for normal C function. All the casting is hidden and + * performed transparently, while user code can just assume working with + * function arguments of specified type and name. + * + * Original raw context argument is preserved as well as 'ctx' argument. + * This is useful when using BPF helpers that expect original context + * as one of the parameters (e.g., for bpf_perf_event_output()). */ -#define __BPF_CAST(t, a, ctx) (t) ctx -#define __BPF_V void -#define __BPF_N - -#define __BPF_DECL_ARGS(t, a, ctx) t a - -#define BPF_TRACE_x(x, sec_name, fname, ret_type, ...) \ -static __always_inline ret_type \ -____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ - \ -SEC(sec_name) \ -ret_type fname(__u64 *ctx) \ -{ \ - return ____##fname(__BPF_MAP(x, __BPF_CAST, __BPF_N, __VA_ARGS__));\ -} \ - \ -static __always_inline \ -ret_type ____##fname(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) - -#define BPF_TRACE_0(sec, fname, ...) BPF_TRACE_x(0, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_1(sec, fname, ...) BPF_TRACE_x(1, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_2(sec, fname, ...) BPF_TRACE_x(2, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_3(sec, fname, ...) BPF_TRACE_x(3, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_4(sec, fname, ...) BPF_TRACE_x(4, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_5(sec, fname, ...) BPF_TRACE_x(5, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_6(sec, fname, ...) BPF_TRACE_x(6, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_7(sec, fname, ...) BPF_TRACE_x(7, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_8(sec, fname, ...) BPF_TRACE_x(8, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_9(sec, fname, ...) BPF_TRACE_x(9, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_10(sec, fname, ...) BPF_TRACE_x(10, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_11(sec, fname, ...) BPF_TRACE_x(11, sec, fname, int, __VA_ARGS__) -#define BPF_TRACE_12(sec, fname, ...) BPF_TRACE_x(12, sec, fname, int, __VA_ARGS__) +#define BPF_PROG(name, args...) \ +name(unsigned long long *ctx); \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_ctx_cast(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args) + +struct pt_regs; + +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) +/* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific + * low-level way of getting kprobe input arguments from struct pt_regs, and + * provides a familiar typed and named function arguments syntax and + * semantics of accessing kprobe input paremeters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + */ +#define BPF_KPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) + +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_argsN(x, args...) \ + ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) + +/* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all + * input kprobe arguments, one last extra argument has to be specified, which + * captures kprobe return value. + */ +#define BPF_KRETPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kretprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) #endif diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index ec219f84e041..a3352a64c067 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,7 +6,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> -#include <libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) { diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c new file mode 100644 index 000000000000..f0a64d8ac59a --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +static void map_batch_update(int map_fd, __u32 max_entries, int *keys, + int *values) +{ + int i, err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + for (i = 0; i < max_entries; i++) { + keys[i] = i; + values[i] = i + 1; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + int *keys, int *values) +{ + int i; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + CHECK(keys[i] + 1 != values[i], "key/value checking", + "error: i %d key %d value %d\n", i, keys[i], values[i]); + visited[i] = 1; + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void test_array_map_batch_ops(void) +{ + struct bpf_create_map_attr xattr = { + .name = "array_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + }; + int map_fd, *keys, *values, *visited; + __u32 count, total, total_success; + const __u32 max_entries = 10; + bool nospace_err; + __u64 batch = 0; + int err, step; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + + keys = malloc(max_entries * sizeof(int)); + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", + strerror(errno)); + + /* populate elements to the map */ + map_batch_update(map_fd, max_entries, keys, values); + + /* test 1: lookup in a loop with various steps. */ + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * sizeof(*values)); + batch = 0; + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each. + */ + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, &batch, + keys + total, + values + total, + &count, &opts); + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + + } + + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + + printf("%s:PASS\n", __func__); + + free(keys); + free(values); + free(visited); +} diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c new file mode 100644 index 000000000000..976bf415fbdd --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <bpf_util.h> +#include <test_maps.h> + +static void map_batch_update(int map_fd, __u32 max_entries, int *keys, + void *values, bool is_pcpu) +{ + typedef BPF_DECLARE_PERCPU(int, value); + value *v = NULL; + int i, j, err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + if (is_pcpu) + v = (value *)values; + + for (i = 0; i < max_entries; i++) { + keys[i] = i + 1; + if (is_pcpu) + for (j = 0; j < bpf_num_possible_cpus(); j++) + bpf_percpu(v[i], j) = i + 2 + j; + else + ((int *)values)[i] = i + 2; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + int *keys, void *values, bool is_pcpu) +{ + typedef BPF_DECLARE_PERCPU(int, value); + value *v = NULL; + int i, j; + + if (is_pcpu) + v = (value *)values; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + + if (is_pcpu) { + for (j = 0; j < bpf_num_possible_cpus(); j++) { + CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j), + "key/value checking", + "error: i %d j %d key %d value %d\n", + i, j, keys[i], bpf_percpu(v[i], j)); + } + } else { + CHECK(keys[i] + 1 != ((int *)values)[i], + "key/value checking", + "error: i %d key %d value %d\n", i, keys[i], + ((int *)values)[i]); + } + + visited[i] = 1; + + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void __test_map_lookup_and_delete_batch(bool is_pcpu) +{ + __u32 batch, count, total, total_success; + typedef BPF_DECLARE_PERCPU(int, value); + int map_fd, *keys, *visited, key; + const __u32 max_entries = 10; + value pcpu_values[max_entries]; + int err, step, value_size; + bool nospace_err; + void *values; + struct bpf_create_map_attr xattr = { + .name = "hash_map", + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : + BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(int), + }; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + + value_size = is_pcpu ? sizeof(value) : sizeof(int); + keys = malloc(max_entries * sizeof(int)); + if (is_pcpu) + values = pcpu_values; + else + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", + "error:%s\n", strerror(errno)); + + /* test 1: lookup/delete an empty hash table, -ENOENT */ + count = max_entries; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK((err && errno != ENOENT), "empty map", + "error: %s\n", strerror(errno)); + + /* populate elements to the map */ + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + + /* test 2: lookup/delete with count = 0, success */ + count = 0; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK(err, "count = 0", "error: %s\n", strerror(errno)); + + /* test 3: lookup/delete with count = max_entries, success */ + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + count = max_entries; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK((err && errno != ENOENT), "count = max_entries", + "error: %s\n", strerror(errno)); + CHECK(count != max_entries, "count = max_entries", + "count = %u, max_entries = %u\n", count, max_entries); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + + /* bpf_map_get_next_key() should return -ENOENT for an empty map. */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno)); + + /* test 4: lookup/delete in a loop with various steps. */ + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each + */ + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, + &batch, keys + total, + values + + total * value_size, + &count, &opts); + /* It is possible that we are failing due to buffer size + * not big enough. In such cases, let us just exit and + * go with large steps. Not that a buffer size with + * max_entries should always work. + */ + if (err && errno == ENOSPC) { + nospace_err = true; + break; + } + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + + } + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + + total = 0; + count = step; + while (total < max_entries) { + if (max_entries - total < step) + count = max_entries - total; + err = bpf_map_delete_batch(map_fd, + keys + total, + &count, &opts); + CHECK((err && errno != ENOENT), "delete batch", + "error: %s\n", strerror(errno)); + total += count; + if (err) + break; + } + CHECK(total != max_entries, "delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + /* check map is empty, errono == ENOENT */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", + "error: %s\n", strerror(errno)); + + /* iteratively lookup/delete elements with 'step' + * elements each + */ + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + total = 0; + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_and_delete_batch(map_fd, + total ? &batch : NULL, + &batch, keys + total, + values + + total * value_size, + &count, &opts); + /* It is possible that we are failing due to buffer size + * not big enough. In such cases, let us just exit and + * go with large steps. Not that a buffer size with + * max_entries should always work. + */ + if (err && errno == ENOSPC) { + nospace_err = true; + break; + } + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + } + + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup/delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", + strerror(errno)); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + free(keys); + free(visited); + if (!is_pcpu) + free(values); +} + +void htab_map_batch_ops(void) +{ + __test_map_lookup_and_delete_batch(false); + printf("test_%s:PASS\n", __func__); +} + +void htab_percpu_map_batch_ops(void) +{ + __test_map_lookup_and_delete_batch(true); + printf("test_%s:PASS\n", __func__); +} + +void test_htab_map_batch_ops(void) +{ + htab_map_batch_ops(); + htab_percpu_map_batch_ops(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c new file mode 100644 index 000000000000..8482bbc67eec --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <linux/err.h> +#include <test_progs.h> +#include "bpf_dctcp.skel.h" +#include "bpf_cubic.skel.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static const unsigned int total_bytes = 10 * 1024 * 1024; +static const struct timeval timeo_sec = { .tv_sec = 10 }; +static const size_t timeo_optlen = sizeof(timeo_sec); +static int stop, duration; + +static int settimeo(int fd) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, + timeo_optlen); + if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n", + errno)) + return -1; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, + timeo_optlen); + if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n", + errno)) + return -1; + + return 0; +} + +static int settcpca(int fd, const char *tcp_ca) +{ + int err; + + err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); + if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", + errno)) + return -1; + + return 0; +} + +static void *server(void *arg) +{ + int lfd = (int)(long)arg, err = 0, fd; + ssize_t nr_sent = 0, bytes = 0; + char batch[1500]; + + fd = accept(lfd, NULL, NULL); + while (fd == -1) { + if (errno == EINTR) + continue; + err = -errno; + goto done; + } + + if (settimeo(fd)) { + err = -errno; + goto done; + } + + while (bytes < total_bytes && !READ_ONCE(stop)) { + nr_sent = send(fd, &batch, + min(total_bytes - bytes, sizeof(batch)), 0); + if (nr_sent == -1 && errno == EINTR) + continue; + if (nr_sent == -1) { + err = -errno; + break; + } + bytes += nr_sent; + } + + CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", + bytes, total_bytes, nr_sent, errno); + +done: + if (fd != -1) + close(fd); + if (err) { + WRITE_ONCE(stop, 1); + return ERR_PTR(err); + } + return NULL; +} + +static void do_test(const char *tcp_ca) +{ + struct sockaddr_in6 sa6 = {}; + ssize_t nr_recv = 0, bytes = 0; + int lfd = -1, fd = -1; + pthread_t srv_thread; + socklen_t addrlen = sizeof(sa6); + void *thread_ret; + char batch[1500]; + int err; + + WRITE_ONCE(stop, 0); + + lfd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + return; + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + close(lfd); + return; + } + + if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) || + settimeo(lfd) || settimeo(fd)) + goto done; + + /* bind, listen and start server thread to accept */ + sa6.sin6_family = AF_INET6; + sa6.sin6_addr = in6addr_loopback; + err = bind(lfd, (struct sockaddr *)&sa6, addrlen); + if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); + if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + goto done; + err = listen(lfd, 1); + if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + goto done; + err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); + if (CHECK(err != 0, "pthread_create", "err:%d\n", err)) + goto done; + + /* connect to server */ + err = connect(fd, (struct sockaddr *)&sa6, addrlen); + if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + goto wait_thread; + + /* recv total_bytes */ + while (bytes < total_bytes && !READ_ONCE(stop)) { + nr_recv = recv(fd, &batch, + min(total_bytes - bytes, sizeof(batch)), 0); + if (nr_recv == -1 && errno == EINTR) + continue; + if (nr_recv == -1) + break; + bytes += nr_recv; + } + + CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", + bytes, total_bytes, nr_recv, errno); + +wait_thread: + WRITE_ONCE(stop, 1); + pthread_join(srv_thread, &thread_ret); + CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", + PTR_ERR(thread_ret)); +done: + close(lfd); + close(fd); +} + +static void test_cubic(void) +{ + struct bpf_cubic *cubic_skel; + struct bpf_link *link; + + cubic_skel = bpf_cubic__open_and_load(); + if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + return; + + link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", + PTR_ERR(link))) { + bpf_cubic__destroy(cubic_skel); + return; + } + + do_test("bpf_cubic"); + + bpf_link__destroy(link); + bpf_cubic__destroy(cubic_skel); +} + +static void test_dctcp(void) +{ + struct bpf_dctcp *dctcp_skel; + struct bpf_link *link; + + dctcp_skel = bpf_dctcp__open_and_load(); + if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + return; + + link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", + PTR_ERR(link))) { + bpf_dctcp__destroy(dctcp_skel); + return; + } + + do_test("bpf_dctcp"); + + bpf_link__destroy(link); + bpf_dctcp__destroy(dctcp_skel); +} + +void test_bpf_tcp_ca(void) +{ + if (test__start_subtest("dctcp")) + test_dctcp(); + if (test__start_subtest("cubic")) + test_cubic(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 9486c13af6b2..e9f2f12ba06b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -48,6 +48,8 @@ void test_bpf_verif_scale(void) { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, + { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + /* full unroll by llvm */ { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, diff --git a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c index 1fa1bdbaffa9..f7c7e25232be 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <bpf/btf.h> -#include "libbpf_internal.h" +#include "bpf/libbpf_internal.h" static int duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index b426bf2f97e4..db5c74d2ce6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -26,7 +26,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, link = calloc(sizeof(struct bpf_link *), prog_cnt); prog = calloc(sizeof(struct bpf_program *), prog_cnt); - result = malloc(prog_cnt * sizeof(u64)); + result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64)); if (CHECK(!link || !prog || !result, "alloc_memory", "failed to alloc memory")) goto close_prog; @@ -98,6 +98,24 @@ static void test_target_yes_callees(void) "fexit/test_pkt_access", "fexit/test_pkt_access_subprog1", "fexit/test_pkt_access_subprog2", + "fexit/test_pkt_access_subprog3", + }; + test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name); +} + +static void test_func_replace(void) +{ + const char *prog_name[] = { + "fexit/test_pkt_access", + "fexit/test_pkt_access_subprog1", + "fexit/test_pkt_access_subprog2", + "fexit/test_pkt_access_subprog3", + "freplace/get_skb_len", + "freplace/get_skb_ifindex", + "freplace/get_constant", }; test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", @@ -109,4 +127,5 @@ void test_fexit_bpf2bpf(void) { test_target_no_callees(); test_target_yes_callees(); + test_func_replace(); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index cf6c87936c69..1450ea2dd4cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,7 +4,7 @@ #include <sched.h> #include <sys/socket.h> #include <test_progs.h> -#include "libbpf_internal.h" +#include "bpf/libbpf_internal.h" static void on_sample(void *ctx, int cpu, void *data, __u32 size) { diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index b607112c64e7..504abb7bfb95 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_send_signal_kern.skel.h" static volatile int sigusr1_received = 0; @@ -9,17 +10,15 @@ static void sigusr1_handler(int signum) } static void test_send_signal_common(struct perf_event_attr *attr, - int prog_type, + bool signal_thread, const char *test_name) { - int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd; - const char *file = "./test_send_signal_kern.o"; - struct bpf_object *obj = NULL; + struct test_send_signal_kern *skel; int pipe_c2p[2], pipe_p2c[2]; - __u32 key = 0, duration = 0; + int err = -1, pmu_fd = -1; + __u32 duration = 0; char buf[256]; pid_t pid; - __u64 val; if (CHECK(pipe(pipe_c2p), test_name, "pipe pipe_c2p error: %s\n", strerror(errno))) @@ -73,45 +72,39 @@ static void test_send_signal_common(struct perf_event_attr *attr, close(pipe_c2p[1]); /* close write */ close(pipe_p2c[0]); /* close read */ - err = bpf_prog_load(file, prog_type, &obj, &prog_fd); - if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n", - strerror(errno))) - goto prog_load_failure; - - pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, - -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", - strerror(errno))) { - err = -1; - goto close_prog; - } + skel = test_send_signal_kern__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n")) + goto skel_open_load_failure; - err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n", - strerror(errno))) - goto disable_pmu; - - err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n", - strerror(errno))) - goto disable_pmu; - - err = -1; - info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map"); - if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map")) - goto disable_pmu; + if (!attr) { + err = test_send_signal_kern__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) { + err = -1; + goto destroy_skel; + } + } else { + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, + -1 /* group id */, 0 /* flags */); + if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", + strerror(errno))) { + err = -1; + goto destroy_skel; + } - status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map"); - if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map")) - goto disable_pmu; + skel->links.send_signal_perf = + bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd); + if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.send_signal_perf))) + goto disable_pmu; + } /* wait until child signal handler installed */ read(pipe_c2p[0], buf, 1); /* trigger the bpf send_signal */ - key = 0; - val = (((__u64)(SIGUSR1)) << 32) | pid; - bpf_map_update_elem(info_map_fd, &key, &val, 0); + skel->bss->pid = pid; + skel->bss->sig = SIGUSR1; + skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ write(pipe_p2c[1], buf, 1); @@ -132,46 +125,20 @@ static void test_send_signal_common(struct perf_event_attr *attr, disable_pmu: close(pmu_fd); -close_prog: - bpf_object__close(obj); -prog_load_failure: +destroy_skel: + test_send_signal_kern__destroy(skel); +skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); wait(NULL); } -static void test_send_signal_tracepoint(void) +static void test_send_signal_tracepoint(bool signal_thread) { - const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id"; - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN, - .sample_period = 1, - .wakeup_events = 1, - }; - __u32 duration = 0; - int bytes, efd; - char buf[256]; - - efd = open(id_path, O_RDONLY, 0); - if (CHECK(efd < 0, "tracepoint", - "open syscalls/sys_enter_nanosleep/id failure: %s\n", - strerror(errno))) - return; - - bytes = read(efd, buf, sizeof(buf)); - close(efd); - if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint", - "read syscalls/sys_enter_nanosleep/id failure: %s\n", - strerror(errno))) - return; - - attr.config = strtol(buf, NULL, 0); - - test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); + test_send_signal_common(NULL, signal_thread, "tracepoint"); } -static void test_send_signal_perf(void) +static void test_send_signal_perf(bool signal_thread) { struct perf_event_attr attr = { .sample_period = 1, @@ -179,15 +146,13 @@ static void test_send_signal_perf(void) .config = PERF_COUNT_SW_CPU_CLOCK, }; - test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_sw_event"); + test_send_signal_common(&attr, signal_thread, "perf_sw_event"); } -static void test_send_signal_nmi(void) +static void test_send_signal_nmi(bool signal_thread) { struct perf_event_attr attr = { - .sample_freq = 50, - .freq = 1, + .sample_period = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; @@ -210,16 +175,21 @@ static void test_send_signal_nmi(void) close(pmu_fd); } - test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_hw_event"); + test_send_signal_common(&attr, signal_thread, "perf_hw_event"); } void test_send_signal(void) { if (test__start_subtest("send_signal_tracepoint")) - test_send_signal_tracepoint(); + test_send_signal_tracepoint(false); if (test__start_subtest("send_signal_perf")) - test_send_signal_perf(); + test_send_signal_perf(false); if (test__start_subtest("send_signal_nmi")) - test_send_signal_nmi(); + test_send_signal_nmi(false); + if (test__start_subtest("send_signal_tracepoint_thread")) + test_send_signal_tracepoint(true); + if (test__start_subtest("send_signal_perf_thread")) + test_send_signal_perf(true); + if (test__start_subtest("send_signal_nmi_thread")) + test_send_signal_nmi(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 8974450a4bdb..f002e3090d92 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -49,8 +49,12 @@ retry: pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, "perf_event_open", - "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", + if (pmu_fd < 0 && errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, errno)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c new file mode 100644 index 000000000000..25b068591e9a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> + +const char *err_str; +bool found; + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + char *log_buf; + + if (level != LIBBPF_WARN || + strcmp(format, "libbpf: \n%s\n")) { + vprintf(format, args); + return 0; + } + + log_buf = va_arg(args, char *); + if (!log_buf) + goto out; + if (strstr(log_buf, err_str) == 0) + found = true; +out: + printf(format, log_buf); + return 0; +} + +extern int extra_prog_load_log_flags; + +static int check_load(const char *file) +{ + struct bpf_prog_load_attr attr; + struct bpf_object *obj = NULL; + int err, prog_fd; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = BPF_PROG_TYPE_UNSPEC; + attr.log_level = extra_prog_load_log_flags; + attr.prog_flags = BPF_F_TEST_RND_HI32; + found = false; + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + bpf_object__close(obj); + return err; +} + +struct test_def { + const char *file; + const char *err_str; +}; + +void test_test_global_funcs(void) +{ + struct test_def tests[] = { + { "test_global_func1.o", "combined stack size of 4 calls is 544" }, + { "test_global_func2.o" }, + { "test_global_func3.o" , "the call stack of 8 frames" }, + { "test_global_func4.o" }, + { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, + { "test_global_func6.o" , "modified ctx ptr R2" }, + { "test_global_func7.o" , "foo() doesn't return scalar" }, + }; + libbpf_print_fn_t old_print_fn = NULL; + int err, i, duration = 0; + + old_print_fn = libbpf_set_print(libbpf_debug_print); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + const struct test_def *test = &tests[i]; + + if (!test__start_subtest(test->file)) + continue; + + err_str = test->err_str; + err = check_load(test->file); + CHECK_FAIL(!!err ^ !!err_str); + if (err_str) + CHECK(found, "", "expected string '%s'", err_str); + } + libbpf_set_print(old_print_fn); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index c32aa28bd93f..465b371a561d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Facebook */ #define _GNU_SOURCE #include <sched.h> +#include <sys/prctl.h> #include <test_progs.h> #define MAX_CNT 100000 @@ -17,7 +18,7 @@ static __u64 time_get_ns(void) static int test_task_rename(const char *prog) { int i, fd, duration = 0, err; - char buf[] = "test\n"; + char buf[] = "test_overhead"; __u64 start_time; fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); @@ -66,6 +67,10 @@ void test_test_overhead(void) struct bpf_object *obj; struct bpf_link *link; int err, duration = 0; + char comm[16] = {}; + + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + return; obj = bpf_object__open_file("./test_overhead.o", NULL); if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) @@ -138,5 +143,6 @@ void test_test_overhead(void) test_run("fexit"); bpf_link__destroy(link); cleanup: + prctl(PR_SET_NAME, comm, 0L, 0L, 0L); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c new file mode 100644 index 000000000000..6b56bdc73ebc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <net/if.h> +#include "test_xdp.skel.h" +#include "test_xdp_bpf2bpf.skel.h" + +void test_xdp_bpf2bpf(void) +{ + __u32 duration = 0, retval, size; + char buf[128]; + int err, pkt_fd, map_fd; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + struct iptnl_info value4 = {.family = AF_INET}; + struct test_xdp *pkt_skel = NULL; + struct test_xdp_bpf2bpf *ftrace_skel = NULL; + struct vip key4 = {.protocol = 6, .family = AF_INET}; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* Load XDP program to introspect */ + pkt_skel = test_xdp__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) + return; + + pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); + + map_fd = bpf_map__fd(pkt_skel->maps.vip2tnl); + bpf_map_update_elem(map_fd, &key4, &value4, 0); + + /* Load trace program */ + opts.attach_prog_fd = pkt_fd, + ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts); + if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) + goto out; + + err = test_xdp_bpf2bpf__load(ftrace_skel); + if (CHECK(err, "__load", "ftrace skeleton failed\n")) + goto out; + + err = test_xdp_bpf2bpf__attach(ftrace_skel); + if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) + goto out; + + /* Run test program */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + if (CHECK(err || retval != XDP_TX || size != 74 || + iph->protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size)) + goto out; + + /* Verify test results */ + if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), + "result", "fentry failed err %llu\n", + ftrace_skel->bss->test_result_fentry)) + goto out; + + CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", + "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); + +out: + test_xdp__destroy(pkt_skel); + test_xdp_bpf2bpf__destroy(ftrace_skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c new file mode 100644 index 000000000000..7897c8f4d363 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* WARNING: This implemenation is not necessarily the same + * as the tcp_cubic.c. The purpose is mainly for testing + * the kernel BPF logic. + * + * Highlights: + * 1. CONFIG_HZ .kconfig map is used. + * 2. In bictcp_update(), calculation is changed to use usec + * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies. + * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c. + * 3. In bitctcp_update() [under tcp_friendliness], the original + * "while (ca->ack_cnt > delta)" loop is changed to the equivalent + * "ca->ack_cnt / delta" operation. + */ + +#include <linux/bpf.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation + * max_cwnd = snd_cwnd * beta + */ +#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ + +/* Two methods of hybrid slow start */ +#define HYSTART_ACK_TRAIN 0x1 +#define HYSTART_DELAY 0x2 + +/* Number of delay samples for detecting the increase of delay */ +#define HYSTART_MIN_SAMPLES 8 +#define HYSTART_DELAY_MIN (4000U) /* 4ms */ +#define HYSTART_DELAY_MAX (16000U) /* 16 ms */ +#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) + +static int fast_convergence = 1; +static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ +static int initial_ssthresh; +static const int bic_scale = 41; +static int tcp_friendliness = 1; + +static int hystart = 1; +static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY; +static int hystart_low_window = 16; +static int hystart_ack_delta_us = 2000; + +static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ +static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 + / (BICTCP_BETA_SCALE - beta); +/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 + * so K = cubic_root( (wmax-cwnd)*rtt/c ) + * the unit of K is bictcp_HZ=2^10, not HZ + * + * c = bic_scale >> 10 + * rtt = 100ms + * + * the following code has been designed and tested for + * cwnd < 1 million packets + * RTT < 100 seconds + * HZ < 1,000,00 (corresponding to 10 nano-second) + */ + +/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */ +static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) + / (bic_scale * 10); + +/* BIC TCP Parameters */ +struct bictcp { + __u32 cnt; /* increase cwnd by 1 after ACKs */ + __u32 last_max_cwnd; /* last maximum snd_cwnd */ + __u32 last_cwnd; /* the last snd_cwnd */ + __u32 last_time; /* time when updated last_cwnd */ + __u32 bic_origin_point;/* origin point of bic function */ + __u32 bic_K; /* time to origin point + from the beginning of the current epoch */ + __u32 delay_min; /* min delay (usec) */ + __u32 epoch_start; /* beginning of an epoch */ + __u32 ack_cnt; /* number of acks */ + __u32 tcp_cwnd; /* estimated tcp cwnd */ + __u16 unused; + __u8 sample_cnt; /* number of samples to decide curr_rtt */ + __u8 found; /* the exit point is found? */ + __u32 round_start; /* beginning of each round */ + __u32 end_seq; /* end_seq of the round */ + __u32 last_ack; /* last time when the ACK spacing is close */ + __u32 curr_rtt; /* the minimum rtt of current round */ +}; + +static inline void bictcp_reset(struct bictcp *ca) +{ + ca->cnt = 0; + ca->last_max_cwnd = 0; + ca->last_cwnd = 0; + ca->last_time = 0; + ca->bic_origin_point = 0; + ca->bic_K = 0; + ca->delay_min = 0; + ca->epoch_start = 0; + ca->ack_cnt = 0; + ca->tcp_cwnd = 0; + ca->found = 0; +} + +extern unsigned long CONFIG_HZ __kconfig; +#define HZ CONFIG_HZ +#define USEC_PER_MSEC 1000UL +#define USEC_PER_SEC 1000000UL +#define USEC_PER_JIFFY (USEC_PER_SEC / HZ) + +static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) +{ + return dividend / divisor; +} + +#define div64_ul div64_u64 + +#define BITS_PER_U64 (sizeof(__u64) * 8) +static __always_inline int fls64(__u64 x) +{ + int num = BITS_PER_U64 - 1; + + if (x == 0) + return 0; + + if (!(x & (~0ull << (BITS_PER_U64-32)))) { + num -= 32; + x <<= 32; + } + if (!(x & (~0ull << (BITS_PER_U64-16)))) { + num -= 16; + x <<= 16; + } + if (!(x & (~0ull << (BITS_PER_U64-8)))) { + num -= 8; + x <<= 8; + } + if (!(x & (~0ull << (BITS_PER_U64-4)))) { + num -= 4; + x <<= 4; + } + if (!(x & (~0ull << (BITS_PER_U64-2)))) { + num -= 2; + x <<= 2; + } + if (!(x & (~0ull << (BITS_PER_U64-1)))) + num -= 1; + + return num + 1; +} + +static __always_inline __u32 bictcp_clock_us(const struct sock *sk) +{ + return tcp_sk(sk)->tcp_mstamp; +} + +static __always_inline void bictcp_hystart_reset(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->round_start = ca->last_ack = bictcp_clock_us(sk); + ca->end_seq = tp->snd_nxt; + ca->curr_rtt = ~0U; + ca->sample_cnt = 0; +} + +/* "struct_ops/" prefix is not a requirement + * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS + * as long as it is used in one of the func ptr + * under SEC(".struct_ops"). + */ +SEC("struct_ops/bictcp_init") +void BPF_PROG(bictcp_init, struct sock *sk) +{ + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + + if (hystart) + bictcp_hystart_reset(sk); + + if (!hystart && initial_ssthresh) + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; +} + +/* No prefix in SEC will also work. + * The remaining tcp-cubic functions have an easier way. + */ +SEC("no-sec-prefix-bictcp_cwnd_event") +void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event) +{ + if (event == CA_EVENT_TX_START) { + struct bictcp *ca = inet_csk_ca(sk); + __u32 now = tcp_jiffies32; + __s32 delta; + + delta = now - tcp_sk(sk)->lsndtime; + + /* We were application limited (idle) for a while. + * Shift epoch_start to keep cwnd growth to cubic curve. + */ + if (ca->epoch_start && delta > 0) { + ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; + } + return; + } +} + +/* + * cbrt(x) MSB values for x MSB values in [0..63]. + * Precomputed then refined by hand - Willy Tarreau + * + * For x in [0..63], + * v = cbrt(x << 18) - 1 + * cbrt(x) = (v[x] + 10) >> 6 + */ +static const __u8 v[] = { + /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, + /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, + /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, + /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, + /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, + /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, + /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, + /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, +}; + +/* calculate the cubic root of x using a table lookup followed by one + * Newton-Raphson iteration. + * Avg err ~= 0.195% + */ +static __always_inline __u32 cubic_root(__u64 a) +{ + __u32 x, b, shift; + + if (a < 64) { + /* a in [0..63] */ + return ((__u32)v[(__u32)a] + 35) >> 6; + } + + b = fls64(a); + b = ((b * 84) >> 8) - 1; + shift = (a >> (b * 3)); + + /* it is needed for verifier's bound check on v */ + if (shift >= 64) + return 0; + + x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6; + + /* + * Newton-Raphson iteration + * 2 + * x = ( 2 * x + a / x ) / 3 + * k+1 k k + */ + x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1))); + x = ((x * 341) >> 10); + return x; +} + +/* + * Compute congestion window to use. + */ +static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, + __u32 acked) +{ + __u32 delta, bic_target, max_cnt; + __u64 offs, t; + + ca->ack_cnt += acked; /* count the number of ACKed packets */ + + if (ca->last_cwnd == cwnd && + (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) + return; + + /* The CUBIC function can update ca->cnt at most once per jiffy. + * On all cwnd reduction events, ca->epoch_start is set to 0, + * which will force a recalculation of ca->cnt. + */ + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) + goto tcp_friendliness; + + ca->last_cwnd = cwnd; + ca->last_time = tcp_jiffies32; + + if (ca->epoch_start == 0) { + ca->epoch_start = tcp_jiffies32; /* record beginning */ + ca->ack_cnt = acked; /* start counting */ + ca->tcp_cwnd = cwnd; /* syn with cubic */ + + if (ca->last_max_cwnd <= cwnd) { + ca->bic_K = 0; + ca->bic_origin_point = cwnd; + } else { + /* Compute new K based on + * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) + */ + ca->bic_K = cubic_root(cube_factor + * (ca->last_max_cwnd - cwnd)); + ca->bic_origin_point = ca->last_max_cwnd; + } + } + + /* cubic function - calc*/ + /* calculate c * time^3 / rtt, + * while considering overflow in calculation of time^3 + * (so time^3 is done by using 64 bit) + * and without the support of division of 64bit numbers + * (so all divisions are done by using 32 bit) + * also NOTE the unit of those veriables + * time = (t - K) / 2^bictcp_HZ + * c = bic_scale >> 10 + * rtt = (srtt >> 3) / HZ + * !!! The following code does not have overflow problems, + * if the cwnd < 1 million packets !!! + */ + + t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY; + t += ca->delay_min; + /* change the unit from usec to bictcp_HZ */ + t <<= BICTCP_HZ; + t /= USEC_PER_SEC; + + if (t < ca->bic_K) /* t - K */ + offs = ca->bic_K - t; + else + offs = t - ca->bic_K; + + /* c/rtt * (t-K)^3 */ + delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); + if (t < ca->bic_K) /* below origin*/ + bic_target = ca->bic_origin_point - delta; + else /* above origin*/ + bic_target = ca->bic_origin_point + delta; + + /* cubic function - calc bictcp_cnt*/ + if (bic_target > cwnd) { + ca->cnt = cwnd / (bic_target - cwnd); + } else { + ca->cnt = 100 * cwnd; /* very small increment*/ + } + + /* + * The initial growth of cubic function may be too conservative + * when the available bandwidth is still unknown. + */ + if (ca->last_max_cwnd == 0 && ca->cnt > 20) + ca->cnt = 20; /* increase cwnd 5% per RTT */ + +tcp_friendliness: + /* TCP Friendly */ + if (tcp_friendliness) { + __u32 scale = beta_scale; + __u32 n; + + /* update tcp cwnd */ + delta = (cwnd * scale) >> 3; + if (ca->ack_cnt > delta && delta) { + n = ca->ack_cnt / delta; + ca->ack_cnt -= n * delta; + ca->tcp_cwnd += n; + } + + if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ + delta = ca->tcp_cwnd - cwnd; + max_cnt = cwnd / delta; + if (ca->cnt > max_cnt) + ca->cnt = max_cnt; + } + } + + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. + */ + ca->cnt = max(ca->cnt, 2U); +} + +/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ +void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + if (!tcp_is_cwnd_limited(sk)) + return; + + if (tcp_in_slow_start(tp)) { + if (hystart && after(ack, ca->end_seq)) + bictcp_hystart_reset(sk); + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + bictcp_update(ca, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, ca->cnt, acked); +} + +__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->epoch_start = 0; /* end of epoch */ + + /* Wmax and fast convergence */ + if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + / (2 * BICTCP_BETA_SCALE); + else + ca->last_max_cwnd = tp->snd_cwnd; + + return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); +} + +void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state) +{ + if (new_state == TCP_CA_Loss) { + bictcp_reset(inet_csk_ca(sk)); + bictcp_hystart_reset(sk); + } +} + +#define GSO_MAX_SIZE 65536 + +/* Account for TSO/GRO delays. + * Otherwise short RTT flows could get too small ssthresh, since during + * slow start we begin with small TSO packets and ca->delay_min would + * not account for long aggregation delay when TSO packets get bigger. + * Ideally even with a very small RTT we would like to have at least one + * TSO packet being sent and received by GRO, and another one in qdisc layer. + * We apply another 100% factor because @rate is doubled at this point. + * We cap the cushion to 1ms. + */ +static __always_inline __u32 hystart_ack_delay(struct sock *sk) +{ + unsigned long rate; + + rate = sk->sk_pacing_rate; + if (!rate) + return 0; + return min((__u64)USEC_PER_MSEC, + div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); +} + +static __always_inline void hystart_update(struct sock *sk, __u32 delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + __u32 threshold; + + if (hystart_detect & HYSTART_ACK_TRAIN) { + __u32 now = bictcp_clock_us(sk); + + /* first detection parameter - ack-train detection */ + if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) { + ca->last_ack = now; + + threshold = ca->delay_min + hystart_ack_delay(sk); + + /* Hystart ack train triggers if we get ack past + * ca->delay_min/2. + * Pacing might have delayed packets up to RTT/2 + * during slow start. + */ + if (sk->sk_pacing_status == SK_PACING_NONE) + threshold >>= 1; + + if ((__s32)(now - ca->round_start) > threshold) { + ca->found = 1; + tp->snd_ssthresh = tp->snd_cwnd; + } + } + } + + if (hystart_detect & HYSTART_DELAY) { + /* obtain the minimum delay of more than sampling packets */ + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; + + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { + ca->found = 1; + tp->snd_ssthresh = tp->snd_cwnd; + } + } + } +} + +void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, + const struct ack_sample *sample) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + __u32 delay; + + /* Some calls are for duplicates without timetamps */ + if (sample->rtt_us < 0) + return; + + /* Discard delay samples right after fast recovery */ + if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ) + return; + + delay = sample->rtt_us; + if (delay == 0) + delay = 1; + + /* first time call or link delay decreases */ + if (ca->delay_min == 0 || ca->delay_min > delay) + ca->delay_min = delay; + + /* hystart triggers when cwnd is larger than some threshold */ + if (!ca->found && tcp_in_slow_start(tp) && hystart && + tp->snd_cwnd >= hystart_low_window) + hystart_update(sk, delay); +} + +__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return max(tp->snd_cwnd, tp->prior_cwnd); +} + +SEC(".struct_ops") +struct tcp_congestion_ops cubic = { + .init = (void *)bictcp_init, + .ssthresh = (void *)bictcp_recalc_ssthresh, + .cong_avoid = (void *)bictcp_cong_avoid, + .set_state = (void *)bictcp_state, + .undo_cwnd = (void *)tcp_reno_undo_cwnd, + .cwnd_event = (void *)bictcp_cwnd_event, + .pkts_acked = (void *)bictcp_acked, + .name = "bpf_cubic", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c new file mode 100644 index 000000000000..b631fb5032d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +/* WARNING: This implemenation is not necessarily the same + * as the tcp_dctcp.c. The purpose is mainly for testing + * the kernel BPF logic. + */ + +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +#define DCTCP_MAX_ALPHA 1024U + +struct dctcp { + __u32 old_delivered; + __u32 old_delivered_ce; + __u32 prior_rcv_nxt; + __u32 dctcp_alpha; + __u32 next_seq; + __u32 ce_state; + __u32 loss_cwnd; +}; + +static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ +static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; + +static __always_inline void dctcp_reset(const struct tcp_sock *tp, + struct dctcp *ca) +{ + ca->next_seq = tp->snd_nxt; + + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; +} + +SEC("struct_ops/dctcp_init") +void BPF_PROG(dctcp_init, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct dctcp *ca = inet_csk_ca(sk); + + ca->prior_rcv_nxt = tp->rcv_nxt; + ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); + ca->loss_cwnd = 0; + ca->ce_state = 0; + + dctcp_reset(tp, ca); +} + +SEC("struct_ops/dctcp_ssthresh") +__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + ca->loss_cwnd = tp->snd_cwnd; + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); +} + +SEC("struct_ops/dctcp_update_alpha") +void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct dctcp *ca = inet_csk_ca(sk); + + /* Expired RTT */ + if (!before(tp->snd_una, ca->next_seq)) { + __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; + __u32 alpha = ca->dctcp_alpha; + + /* alpha = (1 - g) * alpha + g * F */ + + alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); + if (delivered_ce) { + __u32 delivered = tp->delivered - ca->old_delivered; + + /* If dctcp_shift_g == 1, a 32bit value would overflow + * after 8 M packets. + */ + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); + + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); + } + ca->dctcp_alpha = alpha; + dctcp_reset(tp, ca); + } +} + +static __always_inline void dctcp_react_to_loss(struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +SEC("struct_ops/dctcp_state") +void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ +} + +static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (ce_state == 1) + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + else + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +/* Minimal DCTP CE state machine: + * + * S: 0 <- last pkt was non-CE + * 1 <- last pkt was CE + */ +static __always_inline +void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, + __u32 *prior_rcv_nxt, __u32 *ce_state) +{ + __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; + + if (*ce_state != new_ce_state) { + /* CE state has changed, force an immediate ACK to + * reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { + dctcp_ece_ack_cwr(sk, *ce_state); + bpf_tcp_send_ack(sk, *prior_rcv_nxt); + } + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } + *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt; + *ce_state = new_ce_state; + dctcp_ece_ack_cwr(sk, new_ce_state); +} + +SEC("struct_ops/dctcp_cwnd_event") +void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) +{ + struct dctcp *ca = inet_csk_ca(sk); + + switch (ev) { + case CA_EVENT_ECN_IS_CE: + case CA_EVENT_ECN_NO_CE: + dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); + break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; + default: + /* Don't care for the rest. */ + break; + } +} + +SEC("struct_ops/dctcp_cwnd_undo") +__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + +SEC("struct_ops/tcp_reno_cong_avoid") +void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!tcp_is_cwnd_limited(sk)) + return; + + /* In "safe" area, increase. */ + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + /* In dangerous area, increase slowly. */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); +} + +SEC(".struct_ops") +struct tcp_congestion_ops dctcp_nouse = { + .init = (void *)dctcp_init, + .set_state = (void *)dctcp_state, + .flags = TCP_CONG_NEEDS_ECN, + .name = "bpf_dctcp_nouse", +}; + +SEC(".struct_ops") +struct tcp_congestion_ops dctcp = { + .init = (void *)dctcp_init, + .in_ack_event = (void *)dctcp_update_alpha, + .cwnd_event = (void *)dctcp_cwnd_event, + .ssthresh = (void *)dctcp_ssthresh, + .cong_avoid = (void *)tcp_reno_cong_avoid, + .undo_cwnd = (void *)dctcp_cwnd_undo, + .set_state = (void *)dctcp_state, + .flags = TCP_CONG_NEEDS_ECN, + .name = "bpf_dctcp", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 040a44206f29..9941f0ba471e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -16,8 +16,8 @@ #include <sys/socket.h> #include <linux/if_tunnel.h> #include <linux/mpls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; #define PROG(F) SEC(#F) int bpf_func_##F diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1fd244d35ba9..75085119c5bb 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -9,8 +9,8 @@ #include <linux/in6.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP4 0x7f000004U #define DST_REWRITE_IP4 0x7f000001U diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 26397ab7b3c7..506d0f81a375 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -9,8 +9,8 @@ #include <linux/in6.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index ce41a3475f27..8924e06bdef0 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -7,7 +7,7 @@ #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("cgroup/dev") int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c index 615f7c6bca77..38d3a82144ca 100644 --- a/tools/testing/selftests/bpf/progs/fentry_test.c +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -1,43 +1,46 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "bpf_trace_helpers.h" char _license[] SEC("license") = "GPL"; __u64 test1_result = 0; -BPF_TRACE_1("fentry/bpf_fentry_test1", test1, int, a) +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) { test1_result = a == 1; return 0; } __u64 test2_result = 0; -BPF_TRACE_2("fentry/bpf_fentry_test2", test2, int, a, __u64, b) +SEC("fentry/bpf_fentry_test2") +int BPF_PROG(test2, int a, __u64 b) { test2_result = a == 2 && b == 3; return 0; } __u64 test3_result = 0; -BPF_TRACE_3("fentry/bpf_fentry_test3", test3, char, a, int, b, __u64, c) +SEC("fentry/bpf_fentry_test3") +int BPF_PROG(test3, char a, int b, __u64 c) { test3_result = a == 4 && b == 5 && c == 6; return 0; } __u64 test4_result = 0; -BPF_TRACE_4("fentry/bpf_fentry_test4", test4, - void *, a, char, b, int, c, __u64, d) +SEC("fentry/bpf_fentry_test4") +int BPF_PROG(test4, void *a, char b, int c, __u64 d) { test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10; return 0; } __u64 test5_result = 0; -BPF_TRACE_5("fentry/bpf_fentry_test5", test5, - __u64, a, void *, b, short, c, int, d, __u64, e) +SEC("fentry/bpf_fentry_test5") +int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e) { test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && e == 15; @@ -45,8 +48,8 @@ BPF_TRACE_5("fentry/bpf_fentry_test5", test5, } __u64 test6_result = 0; -BPF_TRACE_6("fentry/bpf_fentry_test6", test6, - __u64, a, void *, b, short, c, int, d, void *, e, __u64, f) +SEC("fentry/bpf_fentry_test6") +int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f) { test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && e == (void *)20 && f == 21; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 2d211ee98a1c..c329fccf9842 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -1,7 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#include <linux/stddef.h> +#include <linux/ipv6.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "bpf_trace_helpers.h" struct sk_buff { @@ -9,8 +12,8 @@ struct sk_buff { }; __u64 test_result = 0; -BPF_TRACE_2("fexit/test_pkt_access", test_main, - struct sk_buff *, skb, int, ret) +SEC("fexit/test_pkt_access") +int BPF_PROG(test_main, struct sk_buff *skb, int ret) { int len; @@ -24,8 +27,8 @@ BPF_TRACE_2("fexit/test_pkt_access", test_main, } __u64 test_result_subprog1 = 0; -BPF_TRACE_2("fexit/test_pkt_access_subprog1", test_subprog1, - struct sk_buff *, skb, int, ret) +SEC("fexit/test_pkt_access_subprog1") +int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret) { int len; @@ -79,4 +82,73 @@ int test_subprog2(struct args_subprog2 *ctx) test_result_subprog2 = 1; return 0; } + +__u64 test_result_subprog3 = 0; +SEC("fexit/test_pkt_access_subprog3") +int BPF_PROG(test_subprog3, int val, struct sk_buff *skb, int ret) +{ + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ret != 74 * val || val != 3) + return 0; + test_result_subprog3 = 1; + return 0; +} + +__u64 test_get_skb_len = 0; +SEC("freplace/get_skb_len") +int new_get_skb_len(struct __sk_buff *skb) +{ + int len = skb->len; + + if (len != 74) + return 0; + test_get_skb_len = 1; + return 74; /* original get_skb_len() returns skb->len */ +} + +__u64 test_get_skb_ifindex = 0; +SEC("freplace/get_skb_ifindex") +int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ipv6hdr ip6, *ip6p; + int ifindex = skb->ifindex; + __u32 eth_proto; + __u32 nh_off; + + /* check that BPF extension can read packet via direct packet access */ + if (data + 14 + sizeof(ip6) > data_end) + return 0; + ip6p = data + 14; + + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) + return 0; + + /* check that legacy packet access helper works too */ + if (bpf_skb_load_bytes(skb, 14, &ip6, sizeof(ip6)) < 0) + return 0; + ip6p = &ip6; + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) + return 0; + + if (ifindex != 1 || val != 3 || var != 1) + return 0; + test_get_skb_ifindex = 1; + return 3; /* original get_skb_ifindex() returns val * ifindex * var */ +} + +volatile __u64 test_get_constant = 0; +SEC("freplace/get_constant") +int new_get_constant(long val) +{ + if (val != 123) + return 0; + test_get_constant = 1; + return test_get_constant; /* original get_constant() returns val - 122 */ +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c index ebc0ab7f0f5c..92f3fa47cf40 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "bpf_trace_helpers.h" struct sk_buff { @@ -9,8 +9,9 @@ struct sk_buff { }; __u64 test_result = 0; -BPF_TRACE_2("fexit/test_pkt_md_access", test_main2, - struct sk_buff *, skb, int, ret) + +SEC("fexit/test_pkt_md_access") +int BPF_PROG(test_main2, struct sk_buff *skb, int ret) { int len; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c index 86db0d60fb6e..348109b9ea07 100644 --- a/tools/testing/selftests/bpf/progs/fexit_test.c +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -1,45 +1,47 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "bpf_trace_helpers.h" char _license[] SEC("license") = "GPL"; __u64 test1_result = 0; -BPF_TRACE_2("fexit/bpf_fentry_test1", test1, int, a, int, ret) +SEC("fexit/bpf_fentry_test1") +int BPF_PROG(test1, int a, int ret) { test1_result = a == 1 && ret == 2; return 0; } __u64 test2_result = 0; -BPF_TRACE_3("fexit/bpf_fentry_test2", test2, int, a, __u64, b, int, ret) +SEC("fexit/bpf_fentry_test2") +int BPF_PROG(test2, int a, __u64 b, int ret) { test2_result = a == 2 && b == 3 && ret == 5; return 0; } __u64 test3_result = 0; -BPF_TRACE_4("fexit/bpf_fentry_test3", test3, char, a, int, b, __u64, c, int, ret) +SEC("fexit/bpf_fentry_test3") +int BPF_PROG(test3, char a, int b, __u64 c, int ret) { test3_result = a == 4 && b == 5 && c == 6 && ret == 15; return 0; } __u64 test4_result = 0; -BPF_TRACE_5("fexit/bpf_fentry_test4", test4, - void *, a, char, b, int, c, __u64, d, int, ret) +SEC("fexit/bpf_fentry_test4") +int BPF_PROG(test4, void *a, char b, int c, __u64 d, int ret) { - test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && ret == 34; return 0; } __u64 test5_result = 0; -BPF_TRACE_6("fexit/bpf_fentry_test5", test5, - __u64, a, void *, b, short, c, int, d, __u64, e, int, ret) +SEC("fexit/bpf_fentry_test5") +int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e, int ret) { test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && e == 15 && ret == 65; @@ -47,9 +49,8 @@ BPF_TRACE_6("fexit/bpf_fentry_test5", test5, } __u64 test6_result = 0; -BPF_TRACE_7("fexit/bpf_fentry_test6", test6, - __u64, a, void *, b, short, c, int, d, void *, e, __u64, f, - int, ret) +SEC("fexit/bpf_fentry_test6") +int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret) { test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && e == (void *)20 && f == 21 && ret == 111; diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 16c54ade6888..6b42db2fe391 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index 974d6f3bb319..8f48a909f079 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -2,8 +2,8 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <stdbool.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "bpf_trace_helpers.h" char _license[] SEC("license") = "GPL"; @@ -57,8 +57,8 @@ struct meta { /* TRACE_EVENT(kfree_skb, * TP_PROTO(struct sk_buff *skb, void *location), */ -BPF_TRACE_2("tp_btf/kfree_skb", trace_kfree_skb, - struct sk_buff *, skb, void *, location) +SEC("tp_btf/kfree_skb") +int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location) { struct net_device *dev; struct callback_head *ptr; @@ -114,9 +114,9 @@ static volatile struct { bool fexit_test_ok; } result; -BPF_TRACE_3("fentry/eth_type_trans", fentry_eth_type_trans, - struct sk_buff *, skb, struct net_device *, dev, - unsigned short, protocol) +SEC("fentry/eth_type_trans") +int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev, + unsigned short protocol) { int len, ifindex; @@ -132,9 +132,9 @@ BPF_TRACE_3("fentry/eth_type_trans", fentry_eth_type_trans, return 0; } -BPF_TRACE_3("fexit/eth_type_trans", fexit_eth_type_trans, - struct sk_buff *, skb, struct net_device *, dev, - unsigned short, protocol) +SEC("fexit/eth_type_trans") +int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, struct net_device *dev, + unsigned short protocol) { int len, ifindex; diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index 40ac722a9da5..50e66772c046 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -6,8 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_tracing.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index bb80f29aa7f7..947bb7e988c2 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -6,8 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_tracing.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 2b9165a7afe1..76e93b31c14b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -6,8 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_tracing.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c index 650859022771..b35337926d66 100644 --- a/tools/testing/selftests/bpf/progs/loop4.c +++ b/tools/testing/selftests/bpf/progs/loop4.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c index 28d1d668f07c..913791923fa3 100644 --- a/tools/testing/selftests/bpf/progs/loop5.c +++ b/tools/testing/selftests/bpf/progs/loop5.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define barrier() __asm__ __volatile__("": : :"memory") char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 38a997852cad..d071adf178bd 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -2,7 +2,7 @@ #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "netcnt_common.h" #define MAX_BPS (3 * 1024 * 1024) diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 71d383cc9b85..cc615b82b56e 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -6,7 +6,7 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -154,7 +154,12 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); -static __always_inline int __on_event(struct pt_regs *ctx) +#ifdef GLOBAL_FUNC +__attribute__((noinline)) +#else +static __always_inline +#endif +int __on_event(struct bpf_raw_tracepoint_args *ctx) { uint64_t pid_tgid = bpf_get_current_pid_tgid(); pid_t pid = (pid_t)(pid_tgid >> 32); @@ -254,7 +259,7 @@ static __always_inline int __on_event(struct pt_regs *ctx) } SEC("raw_tracepoint/kfree_skb") -int on_event(struct pt_regs* ctx) +int on_event(struct bpf_raw_tracepoint_args* ctx) { int i, ret = 0; ret |= __on_event(ctx); diff --git a/tools/testing/selftests/bpf/progs/pyperf_global.c b/tools/testing/selftests/bpf/progs/pyperf_global.c new file mode 100644 index 000000000000..079e78a7562b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf_global.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define STACK_MAX_LEN 50 +#define GLOBAL_FUNC +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c index 0756303676ac..1612a32007b6 100644 --- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct bpf_map_def SEC("maps") htab = { .type = BPF_MAP_TYPE_HASH, diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index a91536b1c47e..092d9da536f3 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -5,8 +5,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ #define SRC2_IP4 0x00000000U diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index a68062820410..255a432bc163 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -5,8 +5,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index e4440fdd94cb..0cb5656a22b0 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -4,8 +4,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct socket_cookie { __u64 cookie_key; diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 9390e0244259..a5c6d5903b22 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -1,6 +1,6 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index e80484d98a1a..fdb4bf4408fa 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -1,7 +1,7 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 433e23918a62..4797dc985064 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -1,6 +1,6 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index dede0fcd6102..c6d428a8d785 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 4afd2595c08e..9d8c212dde9f 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <netinet/in.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 1bafbb944e37..d5a5eeb5fb52 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -3,7 +3,7 @@ #include <netinet/in.h> #include <netinet/tcp.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 4bf16e0a1b0e..ad61b722a9de 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -8,7 +8,7 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/types.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> typedef uint32_t pid_t; struct task_struct {}; diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 63531e1a9fa4..1f407e65ae52 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index 21c85c477210..a093e739cf0e 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index 1ecae198b8c1..cabda877cf0a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c index 499388758119..f82075b47d7d 100644 --- a/tools/testing/selftests/bpf/progs/tailcall4.c +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c index 49c64eb53f19..ce5450744fd4 100644 --- a/tools/testing/selftests/bpf/progs/tailcall5.c +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 2cf813a06b83..0cb3204ddb18 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c index 4cd5e860c903..b7fc85769bdc 100644 --- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c +++ b/tools/testing/selftests/bpf/progs/test_adjust_tail.c @@ -7,7 +7,7 @@ */ #include <linux/bpf.h> #include <linux/if_ether.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 221b69700625..dd8fae6660ab 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -3,7 +3,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int kprobe_res = 0; int kretprobe_res = 0; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 62ad7e22105e..88b0566da13d 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "bpf_legacy.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index fb8d91a1dbe0..a924e53c8e9d 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "bpf_legacy.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 3f4422044759..983aedd1c072 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c index 9bfc91d9d004..3ac3603ad53d 100644 --- a/tools/testing/selftests/bpf/progs/test_core_extern.c +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -5,7 +5,7 @@ #include <stdbool.h> #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* non-existing BPF helper, to test dead code elimination */ static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c index 053b86f6b53f..51b3f79df523 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c index edc0f7c9e56d..56aec20212b5 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c index 6c20e433558b..ab1e647aeb31 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c index 1b7f0ae49cfb..7e45e2bdf6cd 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c index b5dbeef540fd..525acc2f841b 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c index c78ab6d28a14..6b5290739806 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 270de441b60a..aba928fd60d3 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c index 292a5c4ee76a..d5756dbdef82 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c index 0b28bfacc8fd..8b533db4a7a5 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c index 39279bf0c9db..2b4b6d49c677 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c index ea57973cdd19..2a8975678aa6 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c index d1eb59d4ea64..ca61a5183b88 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c index 9e091124d3bd..d7fb6cfc7891 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c @@ -3,8 +3,8 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" -#include "bpf_core_read.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index 6a4a8f57f174..29817a703984 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* Permit pretty deep stack traces */ #define MAX_STACK_RAWTP 100 diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 32a6073acb99..dd7a4d3dbc0d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -5,7 +5,7 @@ #include <linux/pkt_cls.h> #include <string.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c new file mode 100644 index 000000000000..880260f6d536 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#ifndef MAX_STACK +#define MAX_STACK (512 - 3 * 32 + 8) +#endif + +static __attribute__ ((noinline)) +int f0(int var, struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + volatile char buf[MAX_STACK] = {}; + + return f0(0, skb) + skb->len; +} + +int f3(int, struct __sk_buff *skb, int); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, skb, 1); +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->ifindex * val * var; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c new file mode 100644 index 000000000000..2c18d82923a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func2.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#define MAX_STACK (512 - 3 * 32) +#include "test_global_func1.c" diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c new file mode 100644 index 000000000000..86f0ecb304fc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + val; +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + return f2(var, skb) + val; +} + +__attribute__ ((noinline)) +int f4(struct __sk_buff *skb) +{ + return f3(1, skb, 2); +} + +__attribute__ ((noinline)) +int f5(struct __sk_buff *skb) +{ + return f4(skb); +} + +__attribute__ ((noinline)) +int f6(struct __sk_buff *skb) +{ + return f5(skb); +} + +__attribute__ ((noinline)) +int f7(struct __sk_buff *skb) +{ + return f6(skb); +} + +#ifndef NO_FN8 +__attribute__ ((noinline)) +int f8(struct __sk_buff *skb) +{ + return f7(skb); +} +#endif + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ +#ifndef NO_FN8 + return f8(skb); +#else + return f7(skb); +#endif +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c new file mode 100644 index 000000000000..610f75edf276 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func4.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#define NO_FN8 +#include "test_global_func3.c" diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c new file mode 100644 index 000000000000..260c25b827ef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +int f3(int, struct __sk_buff *skb); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, (void *)&val); /* type mismatch */ +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb) +{ + return skb->ifindex * val; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f1(skb) + f2(2, skb) + f3(3, skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c new file mode 100644 index 000000000000..69e19c64e10b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func6.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +int f3(int, struct __sk_buff *skb); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, skb + 1); /* type mismatch */ +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb) +{ + return skb->ifindex * val; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f1(skb) + f2(2, skb) + f3(3, skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c new file mode 100644 index 000000000000..309b3f6136bd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func7.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +void foo(struct __sk_buff *skb) +{ + skb->tc_index = 0; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + foo(skb); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 1d652ee8e73d..33493911d87a 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -17,9 +17,9 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "test_iptunnel_common.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 2e4efe70b1e5..28351936a438 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -13,9 +13,9 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "test_iptunnel_common.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c index 4147130cc3b7..7a6620671a83 100644 --- a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <linux/lirc.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("lirc_mode2") int bpf_decoder(unsigned int *sample) diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c index c957d6dfe6d7..d6cb986e7533 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -4,8 +4,8 @@ #include <linux/bpf.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct grehdr { __be16 flags; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index 41a3ebcd593d..48ff2b2ad5e7 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -3,8 +3,8 @@ #include <errno.h> #include <linux/seg6_local.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index 113226115365..1cfeb940cf9f 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -3,7 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/types.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index bb7ce35f691b..b5c07ae7b68f 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -2,7 +2,7 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define VAR_NUM 16 diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c index e808791b7047..6239596cd14e 100644 --- a/tools/testing/selftests/bpf/progs/test_mmap.c +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -3,7 +3,7 @@ #include <linux/bpf.h> #include <stdint.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index 3d30c02bdae9..98b9de2fafd0 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -4,7 +4,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* It is a dumb bpf program such that it must have no * issue to be loaded since testing the verifier is diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 96c0124a04ba..bfe9fbcb9684 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -1,39 +1,45 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ +#include <stdbool.h> +#include <stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_tracing.h" +#include <linux/ptrace.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_trace_helpers.h" +struct task_struct; + SEC("kprobe/__set_task_comm") -int prog1(struct pt_regs *ctx) +int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec) { - return 0; + return !tsk; } SEC("kretprobe/__set_task_comm") -int prog2(struct pt_regs *ctx) +int BPF_KRETPROBE(prog2, + struct task_struct *tsk, const char *buf, bool exec, + int ret) { - return 0; + return !PT_REGS_PARM1(ctx) && ret; } SEC("raw_tp/task_rename") int prog3(struct bpf_raw_tracepoint_args *ctx) { - return 0; + return !ctx->args[0]; } -struct task_struct; -BPF_TRACE_3("fentry/__set_task_comm", prog4, - struct task_struct *, tsk, const char *, buf, __u8, exec) +SEC("fentry/__set_task_comm") +int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec) { - return 0; + return !tsk; } -BPF_TRACE_3("fexit/__set_task_comm", prog5, - struct task_struct *, tsk, const char *, buf, __u8, exec) +SEC("fexit/__set_task_comm") +int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) { - return 0; + return !tsk; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index 07c09ca5546a..ebfcc9f50c35 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -3,7 +3,8 @@ #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -12,7 +13,7 @@ struct { } perf_buf_map SEC(".maps"); SEC("kprobe/sys_nanosleep") -int handle_sys_nanosleep_entry(struct pt_regs *ctx) +int BPF_KPROBE(handle_sys_nanosleep_entry) { int cpu = bpf_get_smp_processor_id(); diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c index f20e7e00373f..4ef2630292b2 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning.c +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c index 51b38abe7ba1..5412e0c732c7 100644 --- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 3a7b4b607ed3..e72eba4a93d2 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -11,8 +11,8 @@ #include <linux/in.h> #include <linux/tcp.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define barrier() __asm__ __volatile__("": : :"memory") int _version SEC("version") = 1; @@ -47,6 +47,38 @@ int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) return skb->len * val; } +#define MAX_STACK (512 - 2 * 32) + +__attribute__ ((noinline)) +int get_skb_len(struct __sk_buff *skb) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->len; +} + +__attribute__ ((noinline)) +int get_constant(long val) +{ + return val - 122; +} + +int get_skb_ifindex(int, struct __sk_buff *skb, int); + +__attribute__ ((noinline)) +int test_pkt_access_subprog3(int val, struct __sk_buff *skb) +{ + return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +} + +__attribute__ ((noinline)) +int get_skb_ifindex(int val, struct __sk_buff *skb, int var) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->ifindex * val * var; +} + SEC("classifier/test_pkt_access") int test_pkt_access(struct __sk_buff *skb) { @@ -82,6 +114,8 @@ int test_pkt_access(struct __sk_buff *skb) return TC_ACT_SHOT; if (test_pkt_access_subprog2(2, skb) != skb->len * 2) return TC_ACT_SHOT; + if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex) + return TC_ACT_SHOT; if (tcp) { if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c index 1db2623021ad..610c74ea9f64 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c @@ -5,7 +5,7 @@ #include <string.h> #include <linux/bpf.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 1871e2ece0c4..d556b1572cc6 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -5,13 +5,14 @@ #include <netinet/in.h> -#include "bpf_helpers.h" -#include "bpf_tracing.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_trace_helpers.h" static struct sockaddr_in old; SEC("kprobe/__sys_connect") -int handle_sys_connect(struct pt_regs *ctx) +int BPF_KPROBE(handle_sys_connect) { void *ptr = (void *)PT_REGS_PARM2(ctx); struct sockaddr_in new; diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 0e014d3b2b36..4dd9806ad73b 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -6,7 +6,7 @@ #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c index 52d94e8b214d..ecbeea2df259 100644 --- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -3,7 +3,7 @@ #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> static volatile const struct { unsigned a[4]; diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index 69880c1e7700..a7278f064368 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -3,8 +3,8 @@ #include <errno.h> #include <linux/seg6_local.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index b1f09f5bb1cf..d69a1f2bbbfd 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -11,8 +11,8 @@ #include <linux/types.h> #include <linux/if_ether.h> -#include "bpf_endian.h" -#include "bpf_helpers.h" +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> #include "test_select_reuseport_common.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 0e6be01157e6..1acc91e87bfc 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -2,46 +2,39 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __type(value, __u64); -} info_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __type(value, __u64); -} status_map SEC(".maps"); - -SEC("send_signal_demo") -int bpf_send_signal_test(void *ctx) +#include <bpf/bpf_helpers.h> + +__u32 sig = 0, pid = 0, status = 0, signal_thread = 0; + +static __always_inline int bpf_send_signal_test(void *ctx) { - __u64 *info_val, *status_val; - __u32 key = 0, pid, sig; int ret; - status_val = bpf_map_lookup_elem(&status_map, &key); - if (!status_val || *status_val != 0) - return 0; - - info_val = bpf_map_lookup_elem(&info_map, &key); - if (!info_val || *info_val == 0) + if (status != 0 || sig == 0 || pid == 0) return 0; - sig = *info_val >> 32; - pid = *info_val & 0xffffFFFF; - if ((bpf_get_current_pid_tgid() >> 32) == pid) { - ret = bpf_send_signal(sig); + if (signal_thread) + ret = bpf_send_signal_thread(sig); + else + ret = bpf_send_signal(sig); if (ret == 0) - *status_val = 1; + status = 1; } return 0; } + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int send_signal_tp(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + +SEC("perf_event") +int send_signal_perf(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index cb49ccb707d1..d2b38fa6a5b0 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -12,8 +12,8 @@ #include <linux/pkt_cls.h> #include <linux/tcp.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 68cf9829f5a7..552f2090665c 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -6,7 +6,7 @@ #include <string.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define NUM_CGROUP_LEVELS 4 diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index e18da87fe84f..202de3938494 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 4f69aac5635f..de03a90f78ca 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -3,7 +3,7 @@ #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct s { int a; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index a47b003623ef..9bcaa37f476a 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -5,8 +5,8 @@ #include <netinet/in.h> #include <stdbool.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> enum bpf_addr_array_idx { ADDR_SRV_IDX, diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index a43b999c8da2..0d31a3b3505f 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -2,7 +2,7 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct hmap_elem { volatile int cnt; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index f5638e26865d..0cf0134631b4 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH #define PERF_MAX_STACK_DEPTH 127 diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 3b7e1dca8829..00ed48672620 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH #define PERF_MAX_STACK_DEPTH 127 diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index d22e438198cf..458b0d69133e 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index cb201cbe11e7..b2e6f9b0894d 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 5cbbff416998..2d0b0b82a78a 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c index 0961415ba477..bf28814bfde5 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_edt.c +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -7,8 +7,8 @@ #include <linux/ip.h> #include <linux/pkt_cls.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* the maximum delay we are willing to add (drop packets beyond that) */ #define TIME_HORIZON_NS (2000 * 1000 * 1000) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 74370e7e286d..37bce7a7c394 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -17,8 +17,8 @@ #include <linux/pkt_cls.h> #include <linux/types.h> -#include "bpf_endian.h" -#include "bpf_helpers.h" +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> static const int cfg_port = 8000; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index d8803dfa8d32..47cbe2eeae43 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -13,8 +13,8 @@ #include <sys/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct bpf_map_def SEC("maps") results = { .type = BPF_MAP_TYPE_ARRAY, diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index 87b7d934ce73..adc83a54c352 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -36,7 +36,7 @@ #include <linux/ipv6.h> #include <linux/version.h> #include <sys/socket.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;}) #define TCP_ESTATS_MAGIC 0xBAADBEEF diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 7fa4595d2b66..1f1966e86e9f 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -10,8 +10,8 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_tcpbpf.h" struct { diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 08346e7765d5..ac63410bb541 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -10,8 +10,8 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_tcpnotify.h" struct { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 04bf084517e0..4b825ee122cf 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -2,7 +2,7 @@ // Copyright (c) 2017 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 504df69c83df..f48dbfe24ddc 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -19,8 +19,8 @@ #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define ERROR(ret) do {\ char fmt[] = "ERROR line:%d ret:%d\n";\ diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c index f3236ce35f31..d38153dab3dd 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __attribute__((noinline)) #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index 9897150ed516..f024154c7be7 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __always_inline #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c index 1848da04ea41..9beb5bf80373 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __attribute__((noinline)) #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index 0941c655b07b..31f9bce37491 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -16,8 +16,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c new file mode 100644 index 000000000000..cb8a04ab7a78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" + +struct net_device { + /* Structure does not need to contain all entries, + * as "preserve_access_index" will use BTF to fix this... + */ + int ifindex; +} __attribute__((preserve_access_index)); + +struct xdp_rxq_info { + /* Structure does not need to contain all entries, + * as "preserve_access_index" will use BTF to fix this... + */ + struct net_device *dev; + __u32 queue_index; +} __attribute__((preserve_access_index)); + +struct xdp_buff { + void *data; + void *data_end; + void *data_meta; + void *data_hard_start; + unsigned long handle; + struct xdp_rxq_info *rxq; +} __attribute__((preserve_access_index)); + +__u64 test_result_fentry = 0; +SEC("fentry/_xdp_tx_iptunnel") +int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) +{ + test_result_fentry = xdp->rxq->dev->ifindex; + return 0; +} + +__u64 test_result_fexit = 0; +SEC("fexit/_xdp_tx_iptunnel") +int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret) +{ + test_result_fexit = ret; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index 97175f73c3fe..fcabcda30ba3 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -12,8 +12,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 8d0182650653..a7c4a7d49fe6 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -2,7 +2,7 @@ #include <linux/if_ether.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define __round_mask(x, y) ((__typeof__(x))((y) - 1)) #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index e88d7b9d65ab..8beecec166d9 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -13,8 +13,8 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> static __u32 rol32(__u32 word, unsigned int shift) { @@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } -static __attribute__ ((noinline)) +__attribute__ ((noinline)) u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; @@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -static __attribute__ ((noinline)) +__attribute__ ((noinline)) u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index ef9e704be140..a5337cd9400b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -10,7 +10,7 @@ * General Public License for more details. */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 365a7d2d9f5c..134768f6b788 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -22,8 +22,8 @@ #include <linux/in.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here * diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c index 43b0ef1001ed..ea25e8881992 100644 --- a/tools/testing/selftests/bpf/progs/xdp_dummy.c +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c @@ -2,7 +2,7 @@ #define KBUILD_MODNAME "xdp_dummy" #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("xdp_dummy") int xdp_dummy_prog(struct xdp_md *ctx) diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c index 1c5f298d7196..d037262c8937 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_DEVMAP); diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 57912e7c94b0..94e6c2b281cb 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("tx") int xdp_tx(struct xdp_md *xdp) diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 112a2857f4e2..6b9ca40bd1f4 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -12,8 +12,8 @@ #include <linux/if_vlan.h> #include <linux/ip.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "xdping.h" diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 3d617e806054..93040ca83e60 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4148,10 +4148,6 @@ static int do_test_file(unsigned int test_num) if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) return PTR_ERR(obj); - err = bpf_object__btf_fd(obj); - if (CHECK(err == -1, "bpf_object__btf_fd: -1")) - goto done; - prog = bpf_program__next(NULL, obj); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index 6fe23a10d48a..a8d2e9a87fbf 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ #include <iostream> -#include "libbpf.h" -#include "bpf.h" -#include "btf.h" +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> #include "test_core_extern.skel.h" /* do nothing, just make sure we can link successfully */ diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c index b64094c981e3..c490e012c23f 100644 --- a/tools/testing/selftests/bpf/test_hashmap.c +++ b/tools/testing/selftests/bpf/test_hashmap.c @@ -8,7 +8,7 @@ #include <stdio.h> #include <errno.h> #include <linux/err.h> -#include "hashmap.h" +#include "bpf/hashmap.h" #define CHECK(condition, format...) ({ \ int __ret = !!(condition); \ diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7fa7d08a8104..bab1e6f1d8f1 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -8,7 +8,7 @@ #include <string.h> /* defined in test_progs.h */ -struct test_env env; +struct test_env env = {}; struct prog_test_def { const char *test_name; @@ -29,10 +29,19 @@ struct prog_test_def { static bool should_run(struct test_selector *sel, int num, const char *name) { - if (sel->name && sel->name[0] && !strstr(name, sel->name)) - return false; + int i; + + for (i = 0; i < sel->blacklist.cnt; i++) { + if (strstr(name, sel->blacklist.strs[i])) + return false; + } - if (!sel->num_set) + for (i = 0; i < sel->whitelist.cnt; i++) { + if (strstr(name, sel->whitelist.strs[i])) + return true; + } + + if (!sel->whitelist.cnt && !sel->num_set) return true; return num < sel->num_set_len && sel->num_set[num]; @@ -334,6 +343,7 @@ const char argp_program_doc[] = "BPF selftests test runner"; enum ARG_KEYS { ARG_TEST_NUM = 'n', ARG_TEST_NAME = 't', + ARG_TEST_NAME_BLACKLIST = 'b', ARG_VERIFIER_STATS = 's', ARG_VERBOSE = 'v', }; @@ -341,8 +351,10 @@ enum ARG_KEYS { static const struct argp_option opts[] = { { "num", ARG_TEST_NUM, "NUM", 0, "Run test number NUM only " }, - { "name", ARG_TEST_NAME, "NAME", 0, - "Run tests with names containing NAME" }, + { "name", ARG_TEST_NAME, "NAMES", 0, + "Run tests with names containing any string from NAMES list" }, + { "name-blacklist", ARG_TEST_NAME_BLACKLIST, "NAMES", 0, + "Don't run tests with names containing any string from NAMES list" }, { "verifier-stats", ARG_VERIFIER_STATS, NULL, 0, "Output verifier statistics", }, { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL, @@ -359,6 +371,41 @@ static int libbpf_print_fn(enum libbpf_print_level level, return 0; } +static int parse_str_list(const char *s, struct str_set *set) +{ + char *input, *state = NULL, *next, **tmp, **strs = NULL; + int cnt = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; + + set->cnt = 0; + set->strs = NULL; + + while ((next = strtok_r(state ? NULL : input, ",", &state))) { + tmp = realloc(strs, sizeof(*strs) * (cnt + 1)); + if (!tmp) + goto err; + strs = tmp; + + strs[cnt] = strdup(next); + if (!strs[cnt]) + goto err; + + cnt++; + } + + set->cnt = cnt; + set->strs = (const char **)strs; + free(input); + return 0; +err: + free(strs); + free(input); + return -ENOMEM; +} + int parse_num_list(const char *s, struct test_selector *sel) { int i, set_len = 0, num, start = 0, end = -1; @@ -449,12 +496,24 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) if (subtest_str) { *subtest_str = '\0'; - env->subtest_selector.name = strdup(subtest_str + 1); - if (!env->subtest_selector.name) + if (parse_str_list(subtest_str + 1, + &env->subtest_selector.whitelist)) + return -ENOMEM; + } + if (parse_str_list(arg, &env->test_selector.whitelist)) + return -ENOMEM; + break; + } + case ARG_TEST_NAME_BLACKLIST: { + char *subtest_str = strchr(arg, '/'); + + if (subtest_str) { + *subtest_str = '\0'; + if (parse_str_list(subtest_str + 1, + &env->subtest_selector.blacklist)) return -ENOMEM; } - env->test_selector.name = strdup(arg); - if (!env->test_selector.name) + if (parse_str_list(arg, &env->test_selector.blacklist)) return -ENOMEM; break; } @@ -617,7 +676,11 @@ int main(int argc, char **argv) printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + free(env.test_selector.blacklist.strs); + free(env.test_selector.whitelist.strs); free(env.test_selector.num_set); + free(env.subtest_selector.blacklist.strs); + free(env.subtest_selector.whitelist.strs); free(env.subtest_selector.num_set); return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index de1fdaa4e7b4..bcfa9ef23fda 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -35,7 +35,7 @@ typedef __u16 __sum16; #include "test_iptunnel_common.h" #include "bpf_util.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> #include "trace_helpers.h" #include "flow_dissector_load.h" @@ -46,8 +46,14 @@ enum verbosity { VERBOSE_SUPER, }; +struct str_set { + const char **strs; + int cnt; +}; + struct test_selector { - const char *name; + struct str_set whitelist; + struct str_set blacklist; bool *num_set; int num_set_len; }; diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 0e6652733462..52bf14955797 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,7 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> #include "bpf_rlimit.h" #include "bpf_util.h" diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index d008b41b7d8d..9b4d3a68a91a 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -12,8 +12,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Sockmap sample program connects a client and a backend together * using cgroups. diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index 40bd93a6e7ae..d196e2a4a6e0 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -13,7 +13,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> #include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index aa4dcfe18050..0383c9b8adc1 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -2,7 +2,7 @@ #ifndef __TRACE_HELPER_H #define __TRACE_HELPER_H -#include <libbpf.h> +#include <bpf/libbpf.h> struct ksym { long addr; diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index b4efb023ae51..d88d8e47d11b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -50,6 +50,8 @@ ALL_TESTS=" ipv6_mc_dip_reserved_scope_test ipv6_mc_dip_interface_local_scope_test blackhole_route_test + irif_disabled_test + erif_disabled_test " NUM_NETIFS=4 @@ -553,6 +555,116 @@ blackhole_route_test() __blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6" } +irif_disabled_test() +{ + local trap_name="irif_disabled" + local group_name="l3_drops" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + + # When RIF of a physical port ("Sub-port RIF") is destroyed, we first + # block the STP of the {Port, VLAN} so packets cannot get into the RIF. + # Using bridge enables us to see this trap because when bridge is + # destroyed, there is a small time window that packets can go into the + # RIF, while it is disabled. + ip link add dev br0 type bridge + ip link set dev $rp1 master br0 + ip address flush dev $rp1 + __addr_add_del br0 add 192.0.2.2/24 + ip li set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + # Generate packets to h2 through br0 RIF that will be removed later + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \ + -B $h2_ipv4 -q & + mz_pid=$! + + # Wait before removing br0 RIF to allow packets to go into the bridge. + sleep 1 + + # Flushing address will dismantle the RIF + ip address flush dev br0 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Ingress RIF disabled" + + kill $mz_pid && wait $mz_pid &> /dev/null + ip link set dev $rp1 nomaster + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + +erif_disabled_test() +{ + local trap_name="erif_disabled" + local group_name="l3_drops" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + ip link add dev br0 type bridge + ip add flush dev $rp1 + ip link set dev $rp1 master br0 + __addr_add_del br0 add 192.0.2.2/24 + ip link set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + rp2mac=$(mac_get $rp2) + + # Generate packets that should go out through br0 RIF that will be + # removed later + $MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \ + -B 192.0.2.1 -q & + mz_pid=$! + + sleep 5 + # In order to see this trap we need a route that points to disabled RIF. + # When ipv6 address is flushed, there is a delay and the routes are + # deleted before the RIF and we cannot get state that we have route + # to disabled RIF. + # Delete IPv6 address first and then check this trap with flushing IPv4. + ip -6 add flush dev br0 + ip -4 add flush dev br0 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Egress RIF disabled" + + kill $mz_pid && wait $mz_pid &> /dev/null + ip link set dev $rp1 nomaster + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh new file mode 100755 index 000000000000..039629bb92a3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -0,0 +1,265 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 192.0.2.2/28 | +# | | +# | + g1a (gre) | +# | loc=192.0.2.65 | +# | rem=192.0.2.66 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 198.51.100.1/28 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 198.51.100.2/28 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +vrf2_create() +{ + simple_if_init $rp2 198.51.100.2/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 198.51.100.2/28 +} + +switch_create() +{ + __addr_add_del $swp1 add 192.0.2.2/28 + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 up + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit + __addr_add_del g1 add 192.0.2.65/32 + ip link set dev g1 up + + __addr_add_del $rp1 add 198.51.100.1/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + __addr_add_del $rp1 del 198.51.100.1/28 + + ip link set dev g1 down + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + ip link set dev $swp1 down + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 192.0.2.2/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ecn_payload_get() +{ + p=$(: + )"0"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 192.0.2.66 -B 192.0.2.65 -t ip \ + len=48,tos=$outer_tos,proto=47,p=$payload -q & + + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +ipip_payload_get() +{ + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"$key"$( : Key + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=192.0.2.66 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 192.0.2.68 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" ":E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233 + __addr_add_del g1 add 192.0.2.65/32 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" "E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh new file mode 100755 index 000000000000..fd19161dd4ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip address add dev $rp1 192.0.2.17/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + ip address del dev $rp1 192.0.2.17/28 + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 192.0.2.18/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"D6:E5:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \ + -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: No L2 header" "short_payload_get" +} + +mc_smac_payload_get() +{ + dest_mac=$(mac_get $h1) + source_mac=01:02:03:04:05:06 + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local group_name="tunnel_drops" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp1 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ip" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh new file mode 100755 index 000000000000..45115f81c2b1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API on top of mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath +" +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source $lib_dir/fib_offload_lib.sh + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +setup_prepare() +{ + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + devlink -N testns1 dev reload $DEVLINK_DEV netns $$ + ip netns del testns1 +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index a5937069ac16..faa51012cdac 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -1,29 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -humanize() -{ - local speed=$1; shift - - for unit in bps Kbps Mbps Gbps; do - if (($(echo "$speed < 1024" | bc))); then - break - fi - - speed=$(echo "scale=1; $speed / 1024" | bc) - done - - echo "$speed${unit}" -} - -rate() -{ - local t0=$1; shift - local t1=$1; shift - local interval=$1; shift - - echo $((8 * (t1 - t0) / interval)) -} - check_rate() { local rate=$1; shift diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 47315fe48d5a..24dd8ed48580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -232,7 +232,7 @@ test_mc_aware() stop_traffic local ucth1=${uc_rate[1]} - start_traffic $h1 own bc bc + start_traffic $h1 192.0.2.65 bc bc local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) @@ -254,7 +254,11 @@ test_mc_aware() ret = 100 * ($ucth1 - $ucth2) / $ucth1 if (ret > 0) { ret } else { 0 } ") - check_err $(bc <<< "$deg > 25") + + # Minimum shaper of 200Mbps on MC TCs should cause about 20% of + # degradation on 1Gbps link. + check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" + check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" local interval=$((d1 - d0)) local mc_ir=$(rate $u0 $u1 $interval) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh new file mode 100755 index 000000000000..c6ce0b448bf3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh new file mode 100755 index 000000000000..8d245f331619 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_prio.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh new file mode 100755 index 000000000000..013886061f15 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_root.sh diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh new file mode 100755 index 000000000000..2f87c3be76a9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API. It makes use of netdevsim +# which registers a listener to the FIB notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv4_error_path + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath + ipv6_error_path +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source $lib_dir/fib_offload_lib.sh + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv4_error_path_add() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \ + &> /dev/null + done + + log_test "IPv4 error path - add" + + ip -n testns1 link del dev dummy1 +} + +ipv4_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv4 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv4_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv4/fib" resource. + ipv4_error_path_add + ipv4_error_path_replay +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +ipv6_error_path_add_single() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \ + &> /dev/null + done + + log_test "IPv6 error path - add single" + + ip -n testns1 link del dev dummy1 +} + +ipv6_error_path_add_multipath() +{ + local lsb + + RET=0 + + for i in $(seq 1 2); do + ip -n testns1 link add name dummy$i type dummy + ip -n testns1 link set dev dummy$i up + ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:10::${lsb}/128 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null + done + + log_test "IPv6 error path - add multipath" + + for i in $(seq 1 2); do + ip -n testns1 link del dev dummy$i + done +} + +ipv6_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv6 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv6_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv6/fib" resource. + ipv6_error_path_add_single + ipv6_error_path_add_multipath + ipv6_error_path_replay +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh new file mode 100644 index 000000000000..66496659bea7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -0,0 +1,873 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Various helpers and tests to verify FIB offload. + +__fib_trap_check() +{ + local ns=$1; shift + local family=$1; shift + local route=$1; shift + local should_fail=$1; shift + local ret + + ip -n $ns -j -p -$family route show $route \ + | jq -e '.[]["flags"] | contains(["trap"])' &> /dev/null + ret=$? + if [[ $should_fail == "true" ]]; then + if [[ $ret -ne 0 ]]; then + return 0 + else + return 1 + fi + fi + + return $ret +} + +fib_trap_check() +{ + local ns=$1; shift + local family=$1; shift + local route=$1; shift + local should_fail=$1; shift + + busywait 5000 __fib_trap_check $ns $family "$route" $should_fail +} + +fib4_trap_check() +{ + local ns=$1; shift + local route=$1; shift + local should_fail=$1; shift + + fib_trap_check $ns 4 "$route" $should_fail +} + +fib6_trap_check() +{ + local ns=$1; shift + local route=$1; shift + local should_fail=$1; shift + + fib_trap_check $ns 6 "$route" $should_fail +} + +fib_ipv4_identical_routes_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 192.0.2.0/24 dev dummy2 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 tos 0 metric 1024" true + check_err $? "Appended route in hardware when should not" + + ip -n $ns route prepend 192.0.2.0/24 dev dummy3 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy3 tos 0 metric 1024" false + check_err $? "Prepended route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true + check_err $? "Route was not replaced in hardware by prepended one" + + log_test "IPv4 identical routes" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv4_tos_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false + check_err $? "Highest TOS route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true + check_err $? "Lowest TOS route still in hardware when should not" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true + check_err $? "Middle TOS route in hardware when should not" + + log_test "IPv4 routes with TOS" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_metric_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1022 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1023 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv4 routes with metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replace_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025 + ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1025 + + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv4 route replace" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv4_delete_test() +{ + local ns=$1; shift + local metric + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Insert multiple routes with the same prefix and length and varying + # metrics. Make sure that throughout delete operations the lowest + # metric route is the one in hardware. + for metric in $(seq 1024 1026); do + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric + done + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false + check_err $? "Lowest metric route not in hardware when should" + + ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1026 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false + check_err $? "Sole route not in hardware when should" + + log_test "IPv4 route delete" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_plen_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Add two routes with the same key and different prefix length and + # make sure both are in hardware. It can be verfied that both are + # sharing the same leaf by checking the /proc/net/fib_trie + ip -n $ns route add 192.0.2.0/24 dev dummy1 + ip -n $ns route add 192.0.2.0/25 dev dummy1 + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false + check_err $? "/24 not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false + check_err $? "/25 not in hardware when should" + + log_test "IPv4 routes with different prefix length" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_metric_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv4 routes replay - metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_tos_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false + check_err $? "Highest TOS route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true + check_err $? "Lowest TOS route in hardware when should not" + + log_test "IPv4 routes replay - TOS" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_plen_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 + ip -n $ns route add 192.0.2.0/25 dev dummy1 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false + check_err $? "/24 not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false + check_err $? "/25 not in hardware when should" + + log_test "IPv4 routes replay - prefix length" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_flush_test() +{ + local ns=$1; shift + local metric + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Exercise the routes flushing code paths by inserting various + # prefix routes on a netdev and then deleting it. + for metric in $(seq 1 20); do + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric + done + + ip -n $ns link del dev dummy1 + + log_test "IPv4 routes flushing" +} + +fib_ipv6_add_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:1::/64 dev dummy2 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" true + check_err $? "Route in hardware when should not" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after appending route" + + log_test "IPv6 single route add" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_metric_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1022 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1023 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv6 routes with metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv6_append_single_test() +{ + local ns=$1; shift + + # When an IPv6 multipath route is added without the 'nexthop' keyword, + # different code paths are taken compared to when the keyword is used. + # This test tries to verify the former. + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after appending" + + ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1025 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not" + + ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1025 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not after appending" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + log_test "IPv6 append single route without 'nexthop' keyword" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replace_single_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1025 + ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1025 + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv6 single route replace" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_metric_multipath_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 2001:db8:10::/64 metric 1022 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + ip -n $ns route add 2001:db8:10::/64 metric 1023 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv6 multipath routes with metric" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_append_multipath_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:2::2 dev dummy2 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after appending" + + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not" + + ip -n $ns route append 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:2::2 dev dummy2 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not after appending" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + log_test "IPv6 append multipath route with 'nexthop' keyword" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replace_multipath_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route replace 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:3::2 dev dummy3 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv6 multipath route replace" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_append_multipath_to_single_test() +{ + local ns=$1; shift + + # Test that when the first route in the leaf is not a multipath route + # and we try to append a multipath route with the same metric to it, it + # is not notified. + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 dev dummy2 metric 1024" true + check_err $? "Route in hardware when should not" + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after append" + + log_test "IPv6 append multipath route to non-multipath route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_delete_single_test() +{ + local ns=$1; shift + + # Test various deletion scenarios, where only a single route is + # deleted from the FIB node. + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + # Test deletion of a single route when it is the only route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + log_test "IPv6 delete sole single route" + + # Test that deletion of last route does not affect the first one. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after deleting higher metric route" + + log_test "IPv6 delete single route not in hardware" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + # Test that first route is replaced by next single route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete single route - replaced by single" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + # Test that first route is replaced by next multipath route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete single route - replaced by multipath" + + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + # Test deletion of a single nexthop from a multipath route. + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after deleting a single nexthop" + + log_test "IPv6 delete single nexthop" + + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_delete_multipath_test() +{ + local ns=$1; shift + + # Test various deletion scenarios, where an entire multipath route is + # deleted from the FIB node. + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + # Test deletion of a multipath route when it is the only route in the + # FIB node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + log_test "IPv6 delete sole multipath route" + + # Test that deletion of last route does not affect the first one. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after deleting higher metric route" + + log_test "IPv6 delete multipath route not in hardware" + + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + # Test that first route is replaced by next single route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete multipath route - replaced by single" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + # Test that first route is replaced by next multipath route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete multipath route - replaced by multipath" + + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replay_single_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 + ip -n $ns route append 2001:db8:1::/64 dev dummy2 + + devlink -N $ns dev reload $devlink_dev + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1" false + check_err $? "First route not in hardware when should" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2" true + check_err $? "Second route in hardware when should not" + + log_test "IPv6 routes replay - single route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replay_multipath_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + + devlink -N $ns dev reload $devlink_dev + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "First route not in hardware when should" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Second route in hardware when should not" + + log_test "IPv6 routes replay - multipath route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8dc5fac98cbc..2f5da414aaa7 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -248,6 +248,24 @@ busywait() done } +until_counter_is() +{ + local value=$1; shift + local current=$("$@") + + echo $((current)) + ((current >= value)) +} + +busywait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + busywait "$timeout" until_counter_is $((base + delta)) "$@" +} + setup_wait_dev() { local dev=$1; shift @@ -575,9 +593,10 @@ tc_rule_stats_get() local dev=$1; shift local pref=$1; shift local dir=$1; shift + local selector=${1:-.packets}; shift tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ - | jq '.[1].options.actions[].stats.packets' + | jq ".[1].options.actions[].stats$selector" } ethtool_stats_get() @@ -588,6 +607,30 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + mac_get() { local if_name=$1 diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh new file mode 100644 index 000000000000..d1f26cb7cd73 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -0,0 +1,233 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a stream of traffic from H1 through a switch, to H2. On the +# egress port from the switch ($swp2), a shaper is installed. The test verifies +# that the rates on the port match the configured shaper. +# +# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or +# ETS, with two different configurations. Traffic is prioritized using 802.1p. +# +# +-------------------------------------------+ +# | H1 | +# | + $h1.10 $h1.11 + | +# | | 192.0.2.1/28 192.0.2.17/28 | | +# | | | | +# | \______________ _____________/ | +# | \ / | +# | + $h1 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | SW + $swp1 | +# | _______________/ \_______________ | +# | / \ | +# | +-|--------------+ +--------------|-+ | +# | | + $swp1.10 | | $swp1.11 + | | +# | | | | | | +# | | BR10 | | BR11 | | +# | | | | | | +# | | + $swp2.10 | | $swp2.11 + | | +# | +-|--------------+ +--------------|-+ | +# | \_______________ ______________/ | +# | \ / | +# | + $swp2 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | H2 + $h2 | +# | ______________/ \______________ | +# | / \ | +# | | | | +# | + $h2.10 $h2.11 + | +# | 192.0.2.2/28 192.0.2.18/28 | +# +-------------------------------------------+ + +NUM_NETIFS=4 +CHECK_TC="yes" +source $lib_dir/lib.sh + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + simple_if_init $dev + mtu_set $dev 10000 + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + ip link set dev $dev.11 type vlan egress 0:1 +} + +host_destroy() +{ + local dev=$1; shift + + vlan_destroy $dev 11 + vlan_destroy $dev 10 + mtu_restore $dev + simple_if_fini $dev +} + +h1_create() +{ + host_create $h1 1 +} + +h1_destroy() +{ + host_destroy $h1 +} + +h2_create() +{ + host_create $h2 2 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ + flower $TCFLAGS vlan_id 10 action pass + tc filter add dev $h2 ingress pref 1011 prot 802.1q \ + flower $TCFLAGS vlan_id 11 action pass +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + host_destroy $h2 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br10 type bridge + ip link add dev br11 type bridge + + for intf in $swp1 $swp2; do + ip link set dev $intf up + mtu_set $intf 10000 + + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br$vlan + ip link set dev $intf.$vlan up + done + done + + for vlan in 10 11; do + ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1 + done + + ip link set dev br10 up + ip link set dev br11 up +} + +switch_destroy() +{ + local intf + local vlan + + # A test may have been interrupted mid-run, with Qdisc installed. Delete + # it here. + tc qdisc del dev $swp2 root 2>/dev/null + + ip link set dev br11 down + ip link set dev br10 down + + for intf in $swp2 $swp1; do + for vlan in 11 10; do + ip link set dev $intf.$vlan down + ip link set dev $intf.$vlan nomaster + vlan_destroy $intf $vlan + done + + mtu_restore $intf + ip link set dev $intf down + done + + ip link del dev br11 + ip link del dev br10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h2_mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 2 10) " vlan 10" + ping_test $h1.11 $(ipaddr 2 11) " vlan 11" +} + +tbf_get_counter() +{ + local vlan=$1; shift + + tc_rule_stats_get $h2 10$vlan ingress .bytes +} + +do_tbf_test() +{ + local vlan=$1; shift + local mbit=$1; shift + + start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + sleep 5 # Wait for the burst to dwindle + + local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) + sleep 10 + local t3=$(tbf_get_counter $vlan) + stop_traffic + + RET=0 + + # Note: TBF uses 10^6 Mbits, not 2^20 ones. + local er=$((mbit * 1000 * 1000)) + local nr=$(rate $t2 $t3 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-5 <= nr_pct && nr_pct <= 5)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + + log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh new file mode 100755 index 000000000000..84fb6cab88e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="ets strict" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh new file mode 100644 index 000000000000..8bd85da1905a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \ + rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \ + rate 800Mbit burst $bs limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 800 $bs +} + +tbf_test() +{ + # This test is used for both ETS and PRIO. Even though we only need two + # bands, PRIO demands a minimum of three. + tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh new file mode 100755 index 000000000000..9c8cb1cb9ba4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="prio bands" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh new file mode 100755 index 000000000000..72aa21ba88c7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 root handle 108: tbf \ + rate 400Mbit burst $bs limit 1M + do_tbf_test 10 400 $bs +} + +tbf_test() +{ + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore new file mode 100644 index 000000000000..d72f07642738 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -0,0 +1,2 @@ +mptcp_connect +*.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile new file mode 100644 index 000000000000..93de52016dde --- /dev/null +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g + +TEST_PROGS := mptcp_connect.sh + +TEST_GEN_FILES = mptcp_connect + +EXTRA_CLEAN := *.pcap + +include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config new file mode 100644 index 000000000000..2499824d9e1c --- /dev/null +++ b/tools/testing/selftests/net/mptcp/config @@ -0,0 +1,4 @@ +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y +CONFIG_VETH=y +CONFIG_NET_SCH_NETEM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c new file mode 100644 index 000000000000..a3dccd816ae4 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <limits.h> +#include <fcntl.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> + +#include <sys/poll.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> + +extern int optind; + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef TCP_ULP +#define TCP_ULP 31 +#endif + +static bool listen_mode; +static int poll_timeout; + +enum cfg_mode { + CFG_MODE_POLL, + CFG_MODE_MMAP, + CFG_MODE_SENDFILE, +}; + +static enum cfg_mode cfg_mode = CFG_MODE_POLL; +static const char *cfg_host; +static const char *cfg_port = "12000"; +static int cfg_sock_proto = IPPROTO_MPTCP; +static bool tcpulp_audit; +static int pf = AF_INET; +static int cfg_sndbuf; + +static void die_usage(void) +{ + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]" + "[ -l ] [ -t timeout ] connect_address\n"); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, + char *host, socklen_t hostlen, + char *serv, socklen_t servlen) +{ + int flags = NI_NUMERICHOST | NI_NUMERICSERV; + int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, + flags); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); + exit(1); + } +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static void set_sndbuf(int fd, unsigned int size) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); + if (err) { + perror("set SO_SNDBUF"); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) { + fprintf(stderr, "Could not create listen socket\n"); + return sock; + } + + if (listen(sock, 20)) { + perror("listen"); + close(sock); + return -1; + } + + return sock; +} + +static bool sock_test_tcpulp(const char * const remoteaddr, + const char * const port) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1, ret = 0; + bool test_pass = false; + + hints.ai_family = AF_INET; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP); + if (sock < 0) { + perror("socket"); + continue; + } + ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp", + sizeof("mptcp")); + if (ret == -1 && errno == EOPNOTSUPP) + test_pass = true; + close(sock); + + if (test_pass) + break; + if (!ret) + fprintf(stderr, + "setsockopt(TCP_ULP) returned 0\n"); + else + perror("setsockopt(TCP_ULP)"); + } + return test_pass; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) { + perror("socket"); + continue; + } + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("connect()"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + return sock; +} + +static size_t do_rnd_write(const int fd, char *buf, const size_t len) +{ + unsigned int do_w; + ssize_t bw; + + do_w = rand() & 0xffff; + if (do_w == 0 || do_w > len) + do_w = len; + + bw = write(fd, buf, do_w); + if (bw < 0) + perror("write"); + + return bw; +} + +static size_t do_write(const int fd, char *buf, const size_t len) +{ + size_t offset = 0; + + while (offset < len) { + size_t written; + ssize_t bw; + + bw = write(fd, buf + offset, len - offset); + if (bw < 0) { + perror("write"); + return 0; + } + + written = (size_t)bw; + offset += written; + } + + return offset; +} + +static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) +{ + size_t cap = rand(); + + cap &= 0xffff; + + if (cap == 0) + cap = 1; + else if (cap > len) + cap = len; + + return read(fd, buf, cap); +} + +static void set_nonblock(int fd) +{ + int flags = fcntl(fd, F_GETFL); + + if (flags == -1) + return; + + fcntl(fd, F_SETFL, flags | O_NONBLOCK); +} + +static int copyfd_io_poll(int infd, int peerfd, int outfd) +{ + struct pollfd fds = { + .fd = peerfd, + .events = POLLIN | POLLOUT, + }; + unsigned int woff = 0, wlen = 0; + char wbuf[8192]; + + set_nonblock(peerfd); + + for (;;) { + char rbuf[8192]; + ssize_t len; + + if (fds.events == 0) + break; + + switch (poll(&fds, 1, poll_timeout)) { + case -1: + if (errno == EINTR) + continue; + perror("poll"); + return 1; + case 0: + fprintf(stderr, "%s: poll timed out (events: " + "POLLIN %u, POLLOUT %u)\n", __func__, + fds.events & POLLIN, fds.events & POLLOUT); + return 2; + } + + if (fds.revents & POLLIN) { + len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + if (len == 0) { + /* no more data to receive: + * peer has closed its write side + */ + fds.events &= ~POLLIN; + + if ((fds.events & POLLOUT) == 0) + /* and nothing more to send */ + break; + + /* Else, still have data to transmit */ + } else if (len < 0) { + perror("read"); + return 3; + } + + do_write(outfd, rbuf, len); + } + + if (fds.revents & POLLOUT) { + if (wlen == 0) { + woff = 0; + wlen = read(infd, wbuf, sizeof(wbuf)); + } + + if (wlen > 0) { + ssize_t bw; + + bw = do_rnd_write(peerfd, wbuf + woff, wlen); + if (bw < 0) + return 111; + + woff += bw; + wlen -= bw; + } else if (wlen == 0) { + /* We have no more data to send. */ + fds.events &= ~POLLOUT; + + if ((fds.events & POLLIN) == 0) + /* ... and peer also closed already */ + break; + + /* ... but we still receive. + * Close our write side. + */ + shutdown(peerfd, SHUT_WR); + } else { + if (errno == EINTR) + continue; + perror("read"); + return 4; + } + } + + if (fds.revents & (POLLERR | POLLNVAL)) { + fprintf(stderr, "Unexpected revents: " + "POLLERR/POLLNVAL(%x)\n", fds.revents); + return 5; + } + } + + close(peerfd); + return 0; +} + +static int do_recvfile(int infd, int outfd) +{ + ssize_t r; + + do { + char buf[16384]; + + r = do_rnd_read(infd, buf, sizeof(buf)); + if (r > 0) { + if (write(outfd, buf, r) != r) + break; + } else if (r < 0) { + perror("read"); + } + } while (r > 0); + + return (int)r; +} + +static int do_mmap(int infd, int outfd, unsigned int size) +{ + char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); + ssize_t ret = 0, off = 0; + size_t rem; + + if (inbuf == MAP_FAILED) { + perror("mmap"); + return 1; + } + + rem = size; + + while (rem > 0) { + ret = write(outfd, inbuf + off, rem); + + if (ret < 0) { + perror("write"); + break; + } + + off += ret; + rem -= ret; + } + + munmap(inbuf, size); + return rem; +} + +static int get_infd_size(int fd) +{ + struct stat sb; + ssize_t count; + int err; + + err = fstat(fd, &sb); + if (err < 0) { + perror("fstat"); + return -1; + } + + if ((sb.st_mode & S_IFMT) != S_IFREG) { + fprintf(stderr, "%s: stdin is not a regular file\n", __func__); + return -2; + } + + count = sb.st_size; + if (count > INT_MAX) { + fprintf(stderr, "File too large: %zu\n", count); + return -3; + } + + return (int)count; +} + +static int do_sendfile(int infd, int outfd, unsigned int count) +{ + while (count > 0) { + ssize_t r; + + r = sendfile(outfd, infd, NULL, count); + if (r < 0) { + perror("sendfile"); + return 3; + } + + count -= r; + } + + return 0; +} + +static int copyfd_io_mmap(int infd, int peerfd, int outfd, + unsigned int size) +{ + int err; + + if (listen_mode) { + err = do_recvfile(peerfd, outfd); + if (err) + return err; + + err = do_mmap(infd, peerfd, size); + } else { + err = do_mmap(infd, peerfd, size); + if (err) + return err; + + shutdown(peerfd, SHUT_WR); + + err = do_recvfile(peerfd, outfd); + } + + return err; +} + +static int copyfd_io_sendfile(int infd, int peerfd, int outfd, + unsigned int size) +{ + int err; + + if (listen_mode) { + err = do_recvfile(peerfd, outfd); + if (err) + return err; + + err = do_sendfile(infd, peerfd, size); + } else { + err = do_sendfile(infd, peerfd, size); + if (err) + return err; + err = do_recvfile(peerfd, outfd); + } + + return err; +} + +static int copyfd_io(int infd, int peerfd, int outfd) +{ + int file_size; + + switch (cfg_mode) { + case CFG_MODE_POLL: + return copyfd_io_poll(infd, peerfd, outfd); + case CFG_MODE_MMAP: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + return copyfd_io_mmap(infd, peerfd, outfd, file_size); + case CFG_MODE_SENDFILE: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + return copyfd_io_sendfile(infd, peerfd, outfd, file_size); + } + + fprintf(stderr, "Invalid mode %d\n", cfg_mode); + + die_usage(); + return 1; +} + +static void check_sockaddr(int pf, struct sockaddr_storage *ss, + socklen_t salen) +{ + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + socklen_t wanted_size = 0; + + switch (pf) { + case AF_INET: + wanted_size = sizeof(*sin); + sin = (void *)ss; + if (!sin->sin_port) + fprintf(stderr, "accept: something wrong: ip connection from port 0"); + break; + case AF_INET6: + wanted_size = sizeof(*sin6); + sin6 = (void *)ss; + if (!sin6->sin6_port) + fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); + break; + default: + fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); + return; + } + + if (salen != wanted_size) + fprintf(stderr, "accept: size mismatch, got %d expected %d\n", + (int)salen, wanted_size); + + if (ss->ss_family != pf) + fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", + (int)ss->ss_family, pf); +} + +static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) +{ + struct sockaddr_storage peerss; + socklen_t peersalen = sizeof(peerss); + + if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { + perror("getpeername"); + return; + } + + if (peersalen != salen) { + fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); + return; + } + + if (memcmp(ss, &peerss, peersalen)) { + char a[INET6_ADDRSTRLEN]; + char b[INET6_ADDRSTRLEN]; + char c[INET6_ADDRSTRLEN]; + char d[INET6_ADDRSTRLEN]; + + xgetnameinfo((struct sockaddr *)ss, salen, + a, sizeof(a), b, sizeof(b)); + + xgetnameinfo((struct sockaddr *)&peerss, peersalen, + c, sizeof(c), d, sizeof(d)); + + fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", + __func__, a, c, b, d, peersalen, salen); + } +} + +static void check_getpeername_connect(int fd) +{ + struct sockaddr_storage ss; + socklen_t salen = sizeof(ss); + char a[INET6_ADDRSTRLEN]; + char b[INET6_ADDRSTRLEN]; + + if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { + perror("getpeername"); + return; + } + + xgetnameinfo((struct sockaddr *)&ss, salen, + a, sizeof(a), b, sizeof(b)); + + if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, + cfg_host, a, cfg_port, b); +} + +int main_loop_s(int listensock) +{ + struct sockaddr_storage ss; + struct pollfd polls; + socklen_t salen; + int remotesock; + + polls.fd = listensock; + polls.events = POLLIN; + + switch (poll(&polls, 1, poll_timeout)) { + case -1: + perror("poll"); + return 1; + case 0: + fprintf(stderr, "%s: timed out\n", __func__); + close(listensock); + return 2; + } + + salen = sizeof(ss); + remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); + if (remotesock >= 0) { + check_sockaddr(pf, &ss, salen); + check_getpeername(remotesock, &ss, salen); + + return copyfd_io(0, remotesock, 1); + } + + perror("accept"); + + return 1; +} + +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + unsigned int foo; + + if (fd > 0) { + int ret = read(fd, &foo, sizeof(foo)); + + if (ret < 0) + srand(fd + foo); + close(fd); + } + + srand(foo); +} + +int main_loop(void) +{ + int fd; + + /* listener is ready. */ + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto); + if (fd < 0) + return 2; + + check_getpeername_connect(fd); + + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + + return copyfd_io(0, fd, 1); +} + +int parse_proto(const char *proto) +{ + if (!strcasecmp(proto, "MPTCP")) + return IPPROTO_MPTCP; + if (!strcasecmp(proto, "TCP")) + return IPPROTO_TCP; + + fprintf(stderr, "Unknown protocol: %s\n.", proto); + die_usage(); + + /* silence compiler warning */ + return 0; +} + +int parse_mode(const char *mode) +{ + if (!strcasecmp(mode, "poll")) + return CFG_MODE_POLL; + if (!strcasecmp(mode, "mmap")) + return CFG_MODE_MMAP; + if (!strcasecmp(mode, "sendfile")) + return CFG_MODE_SENDFILE; + + fprintf(stderr, "Unknown test mode: %s\n", mode); + fprintf(stderr, "Supported modes are:\n"); + fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); + fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); + fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); + + die_usage(); + + /* silence compiler warning */ + return 0; +} + +int parse_sndbuf(const char *size) +{ + unsigned long s; + + errno = 0; + + s = strtoul(size, NULL, 0); + + if (errno) { + fprintf(stderr, "Invalid sndbuf size %s (%s)\n", + size, strerror(errno)); + die_usage(); + } + + if (s > INT_MAX) { + fprintf(stderr, "Invalid sndbuf size %s (%s)\n", + size, strerror(ERANGE)); + die_usage(); + } + + cfg_sndbuf = s; + + return 0; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) { + switch (c) { + case 'l': + listen_mode = true; + break; + case 'p': + cfg_port = optarg; + break; + case 's': + cfg_sock_proto = parse_proto(optarg); + break; + case 'h': + die_usage(); + break; + case 'u': + tcpulp_audit = true; + break; + case '6': + pf = AF_INET6; + break; + case 't': + poll_timeout = atoi(optarg) * 1000; + if (poll_timeout <= 0) + poll_timeout = -1; + break; + case 'm': + cfg_mode = parse_mode(optarg); + break; + case 'b': + cfg_sndbuf = parse_sndbuf(optarg); + break; + } + } + + if (optind + 1 != argc) + die_usage(); + cfg_host = argv[optind]; + + if (strchr(cfg_host, ':')) + pf = AF_INET6; +} + +int main(int argc, char *argv[]) +{ + init_rng(); + + parse_opts(argc, argv); + + if (tcpulp_audit) + return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1; + + if (listen_mode) { + int fd = sock_listen_mptcp(cfg_host, cfg_port); + + if (fd < 0) + return 1; + + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + + return main_loop_s(fd); + } + + return main_loop(); +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh new file mode 100755 index 000000000000..d573a0feb98d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -0,0 +1,595 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +time_start=$(date +%s) + +optstring="b:d:e:l:r:h4cm:" +ret=0 +sin="" +sout="" +cin="" +cout="" +ksft_skip=4 +capture=false +timeout=30 +ipv6=true +ethtool_random_on=true +tc_delay="$((RANDOM%400))" +tc_loss=$((RANDOM%101)) +tc_reorder="" +testmode="" +sndbuf=0 +options_log=true + +if [ $tc_loss -eq 100 ];then + tc_loss=1% +elif [ $tc_loss -ge 10 ]; then + tc_loss=0.$tc_loss% +elif [ $tc_loss -ge 1 ]; then + tc_loss=0.0$tc_loss% +else + tc_loss="" +fi + +usage() { + echo "Usage: $0 [ -a ]" + echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)" + echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)" + echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)" + echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-b: set sndbuf value (default: use kernel default)" + echo -e "\t-m: test mode (poll, sendfile; default: poll)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "d") + if [ $OPTARG -ge 0 ];then + tc_delay="$OPTARG" + else + echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "e") + ethtool_args="$ethtool_args $OPTARG off" + ethtool_random_on=false + ;; + "l") + tc_loss="$OPTARG" + ;; + "r") + tc_reorder="$OPTARG" + ;; + "4") + ipv6=false + ;; + "c") + capture=true + ;; + "b") + if [ $OPTARG -ge 0 ];then + sndbuf="$OPTARG" + else + echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "m") + testmode="$OPTARG" + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" +ns4="ns4-$rndh" + +TEST_COUNT=0 + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + rm -f "$capout" + + local netns + for netns in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +capout=$(mktemp) +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +# "$ns1" ns2 ns3 ns4 +# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 +# - drop 1% -> reorder 25% +# <- TSO off - + +ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" +ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3" +ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4" + +ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2 +ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad + +ip -net "$ns1" link set ns1eth2 up +ip -net "$ns1" route add default via 10.0.1.2 +ip -net "$ns1" route add default via dead:beef:1::2 + +ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 +ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad +ip -net "$ns2" link set ns2eth1 up + +ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3 +ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad +ip -net "$ns2" link set ns2eth3 up +ip -net "$ns2" route add default via 10.0.2.2 +ip -net "$ns2" route add default via dead:beef:2::2 +ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + +ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2 +ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad +ip -net "$ns3" link set ns3eth2 up + +ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4 +ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad +ip -net "$ns3" link set ns3eth4 up +ip -net "$ns3" route add default via 10.0.2.1 +ip -net "$ns3" route add default via dead:beef:2::1 +ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1 +ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1 + +ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3 +ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad +ip -net "$ns4" link set ns4eth3 up +ip -net "$ns4" route add default via 10.0.3.2 +ip -net "$ns4" route add default via dead:beef:3::2 + +set_ethtool_flags() { + local ns="$1" + local dev="$2" + local flags="$3" + + ip netns exec $ns ethtool -K $dev $flags 2>/dev/null + [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" +} + +set_random_ethtool_flags() { + local flags="" + local r=$RANDOM + + local pick1=$((r & 1)) + local pick2=$((r & 2)) + local pick3=$((r & 4)) + + [ $pick1 -ne 0 ] && flags="tso off" + [ $pick2 -ne 0 ] && flags="$flags gso off" + [ $pick3 -ne 0 ] && flags="$flags gro off" + + [ -z "$flags" ] && return + + set_ethtool_flags "$1" "$2" "$flags" +} + +if $ethtool_random_on;then + set_random_ethtool_flags "$ns3" ns3eth2 + set_random_ethtool_flags "$ns4" ns4eth3 +else + set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args" + set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" +fi + +print_file_err() +{ + ls -l "$1" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "$1" +} + +check_transfer() +{ + local in=$1 + local out=$2 + local what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "[ FAIL ] $what does not match (in, out):" + print_file_err "$in" + print_file_err "$out" + + return 1 + fi + + return 0 +} + +check_mptcp_disabled() +{ + local disabled_ns + disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" + ip netns add ${disabled_ns} || exit $ksft_skip + + # net.mptcp.enabled should be enabled by default + if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then + echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" + ret=1 + return 1 + fi + ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 + + local err=0 + LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + grep -q "^socket: Protocol not available$" && err=1 + ip netns delete ${disabled_ns} + + if [ ${err} -eq 0 ]; then + echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" + ret=1 + return 1 + fi + + echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + return 0 +} + +check_mptcp_ulp_setsockopt() +{ + local t retval + t="ns_ulp-$sech-$(mktemp -u XXXXXX)" + + ip netns add ${t} || exit $ksft_skip + if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then + printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n" + retval=1 + ret=$retval + else + printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n" + retval=0 + fi + ip netns del ${t} + return $retval +} + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local ping_args="-q -c 1" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local local_addr="$6" + local extra_args="" + + local port + port=$((10000+$TEST_COUNT)) + TEST_COUNT=$((TEST_COUNT+1)) + + if [ "$sndbuf" -gt 0 ]; then + extra_args="$extra_args -b $sndbuf" + fi + + if [ -n "$testmode" ]; then + extra_args="$extra_args -m $testmode" + fi + + if [ -n "$extra_args" ] && $options_log; then + options_log=false + echo "INFO: extra options: $extra_args" + fi + + :> "$cout" + :> "$sout" + :> "$capout" + + local addr_port + addr_port=$(printf "%s:%d" ${connect_addr} ${port}) + printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto} + + if $capture; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap" + + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + local cappid=$! + + sleep 1 + fi + + ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${listener_ns}" "${port}" + + local start + start=$(date +%s%3N) + ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + local stop + stop=$(date +%s%3N) + + if $capture; then + sleep 1 + kill $cappid + fi + + local duration + duration=$((stop-start)) + duration=$(printf "(duration %05sms)" $duration) + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 + echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + + cat "$capout" + return 1 + fi + + check_transfer $sin $cout "file received by client" + retc=$? + check_transfer $cin $sout "file received by server" + rets=$? + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + echo "$duration [ OK ]" + cat "$capout" + return 0 + fi + + cat "$capout" + return 1 +} + +make_file() +{ + local name=$1 + local who=$2 + + local SIZE TSIZE + SIZE=$((RANDOM % (1024 * 8))) + TSIZE=$((SIZE * 1024)) + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + + SIZE=$((RANDOM % 1024)) + SIZE=$((SIZE + 128)) + TSIZE=$((TSIZE + SIZE)) + dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + + echo "Created $name (size $TSIZE) containing data sent by $who" +} + +run_tests_lo() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local loopback="$4" + local lret=0 + + # skip if test programs are running inside same netns for subsequent runs. + if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then + return 0 + fi + + # skip if we don't want v6 + if ! $ipv6 && is_v6 "${connect_addr}"; then + return 0 + fi + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + # don't bother testing fallback tcp except for loopback case. + if [ ${listener_ns} != ${connector_ns} ]; then + return 0 + fi + + do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + return 0 +} + +run_tests() +{ + run_tests_lo $1 $2 $3 0 +} + +make_file "$cin" "client" +make_file "$sin" "server" + +check_mptcp_disabled + +check_mptcp_ulp_setsockopt + +echo "INFO: validating network environment with pings" +for sender in "$ns1" "$ns2" "$ns3" "$ns4";do + do_ping "$ns1" $sender 10.0.1.1 + do_ping "$ns1" $sender dead:beef:1::1 + + do_ping "$ns2" $sender 10.0.1.2 + do_ping "$ns2" $sender dead:beef:1::2 + do_ping "$ns2" $sender 10.0.2.1 + do_ping "$ns2" $sender dead:beef:2::1 + + do_ping "$ns3" $sender 10.0.2.2 + do_ping "$ns3" $sender dead:beef:2::2 + do_ping "$ns3" $sender 10.0.3.2 + do_ping "$ns3" $sender dead:beef:3::2 + + do_ping "$ns4" $sender 10.0.3.1 + do_ping "$ns4" $sender dead:beef:3::1 +done + +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss +echo -n "INFO: Using loss of $tc_loss " +test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " + +if [ -z "${tc_reorder}" ]; then + reorder1=$((RANDOM%10)) + reorder1=$((100 - reorder1)) + reorder2=$((RANDOM%100)) + + if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + tc_reorder="reorder ${reorder1}% ${reorder2}%" + echo -n "$tc_reorder " + fi +elif [ "$tc_reorder" = "0" ];then + tc_reorder="" +elif [ "$tc_delay" -gt 0 ];then + # reordering requires some delay + tc_reorder="reorder $tc_reorder" + echo -n "$tc_reorder " +fi + +echo "on ns3eth4" + +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder + +for sender in $ns1 $ns2 $ns3 $ns4;do + run_tests_lo "$ns1" "$sender" 10.0.1.1 1 + if [ $ret -ne 0 ] ;then + echo "FAIL: Could not even run loopback test" 1>&2 + exit $ret + fi + run_tests_lo "$ns1" $sender dead:beef:1::1 1 + if [ $ret -ne 0 ] ;then + echo "FAIL: Could not even run loopback v6 test" 2>&1 + exit $ret + fi + + run_tests "$ns2" $sender 10.0.1.2 + run_tests "$ns2" $sender dead:beef:1::2 + run_tests "$ns2" $sender 10.0.2.1 + run_tests "$ns2" $sender dead:beef:2::1 + + run_tests "$ns3" $sender 10.0.2.2 + run_tests "$ns3" $sender dead:beef:2::2 + run_tests "$ns3" $sender 10.0.3.2 + run_tests "$ns3" $sender dead:beef:3::2 + + run_tests "$ns4" $sender 10.0.3.1 + run_tests "$ns4" $sender dead:beef:3::1 +done + +time_end=$(date +%s) +time_run=$((time_end-time_start)) + +echo "Time: ${time_run} seconds" + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings new file mode 100644 index 000000000000..026384c189c9 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/settings @@ -0,0 +1 @@ +timeout=450 diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index de1032b5ddea..08194aa44006 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,7 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ + nft_concat_range.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh new file mode 100755 index 000000000000..aca21dde102a --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -0,0 +1,1481 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# nft_concat_range.sh - Tests for sets with concatenation of ranged fields +# +# Copyright (c) 2019 Red Hat GmbH +# +# Author: Stefano Brivio <sbrivio@redhat.com> +# +# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031 +# ^ Configuration and templates sourced with eval, counters reused in subshells + +KSELFTEST_SKIP=4 + +# Available test groups: +# - correctness: check that packets match given entries, and only those +# - concurrency: attempt races between insertion, deletion and lookup +# - timeout: check that packets match entries until they expire +# - performance: estimate matching rate, compare with rbtree and hash baselines +TESTS="correctness concurrency timeout" +[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance" + +# Set types, defined by TYPE_ variables below +TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto + net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port + net_port_mac_proto_net" + +# List of possible paths to pktgen script from kernel tree for performance tests +PKTGEN_SCRIPT_PATHS=" + ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + pktgen/pktgen_bench_xmit_mode_netif_receive.sh" + +# Definition of set types: +# display display text for test report +# type_spec nftables set type specifier +# chain_spec nftables type specifier for rules mapping to set +# dst call sequence of format_*() functions for destination fields +# src call sequence of format_*() functions for source fields +# start initial integer used to generate addresses and ports +# count count of entries to generate and match +# src_delta number summed to destination generator for source fields +# tools list of tools for correctness and timeout tests, any can be used +# proto L4 protocol of test packets +# +# race_repeat race attempts per thread, 0 disables concurrency test for type +# flood_tools list of tools for concurrency tests, any can be used +# flood_proto L4 protocol of test packets for concurrency tests +# flood_spec nftables type specifier for concurrency tests +# +# perf_duration duration of single pktgen injection test +# perf_spec nftables type specifier for performance tests +# perf_dst format_*() functions for destination fields in performance test +# perf_src format_*() functions for source fields in performance test +# perf_entries number of set entries for performance test +# perf_proto L3 protocol of test packets +TYPE_net_port=" +display net,port +type_spec ipv4_addr . inet_service +chain_spec ip daddr . udp dport +dst addr4 port +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec ip daddr . udp dport + +perf_duration 5 +perf_spec ip daddr . udp dport +perf_dst addr4 port +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_port_net=" +display port,net +type_spec inet_service . ipv4_addr +chain_spec udp dport . ip daddr +dst port addr4 +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec udp dport . ip daddr + +perf_duration 5 +perf_spec udp dport . ip daddr +perf_dst port addr4 +perf_src +perf_entries 100 +perf_proto ipv4 +" + +TYPE_net6_port=" +display net6,port +type_spec ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport +dst addr6 port +src +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . udp dport + +perf_duration 5 +perf_spec ip6 daddr . udp dport +perf_dst addr6 port +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_port_proto=" +display port,proto +type_spec inet_service . inet_proto +chain_spec udp dport . meta l4proto +dst port proto +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec udp dport . meta l4proto +perf_dst port proto +perf_src +perf_entries 30000 +perf_proto ipv4 +" + +TYPE_net6_port_mac=" +display net6,port,mac +type_spec ipv6_addr . inet_service . ether_addr +chain_spec ip6 daddr . udp dport . ether saddr +dst addr6 port +src mac +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr +perf_dst addr6 port mac +perf_src +perf_entries 10 +perf_proto ipv6 +" + +TYPE_net6_port_mac_proto=" +display net6,port,mac,proto +type_spec ipv6_addr . inet_service . ether_addr . inet_proto +chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto +dst addr6 port +src mac proto +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto +perf_dst addr6 port mac proto +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_net_port_net=" +display net,port,net +type_spec ipv4_addr . inet_service . ipv4_addr +chain_spec ip daddr . udp dport . ip saddr +dst addr4 port +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . udp dport . ip saddr + +perf_duration 0 +" + +TYPE_net6_port_net6_port=" +display net6,port,net6,port +type_spec ipv6_addr . inet_service . ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport +dst addr6 port +src addr6 port +start 10 +count 5 +src_delta 2000 +tools sendip nc +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport + +perf_duration 0 +" + +TYPE_net_port_mac_proto_net=" +display net,port,mac,proto,net +type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr +dst addr4 port +src mac proto addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac=" +display net,mac +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec ip daddr . ether daddr +perf_dst addr4 mac +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_net_mac_icmp=" +display net,mac - ICMP +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools ping +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net6_mac_icmp=" +display net6,mac - ICMPv6 +type_spec ipv6_addr . ether_addr +chain_spec ip6 daddr . ether saddr +dst addr6 +src mac +start 10 +count 50 +src_delta 2000 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_port_proto_net=" +display net,port,proto,net +type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . meta l4proto . ip saddr +dst addr4 port proto +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . tcp dport . meta l4proto . ip saddr + +perf_duration 0 +" + +# Set template for all tests, types and rules are filled in depending on test +set_template=' +flush ruleset + +table inet filter { + counter test { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval,timeout + } + + chain input { + type filter hook prerouting priority 0; policy accept; + ${chain_spec} @test counter name \"test\" + } +} + +table netdev perf { + counter test { + packets 0 bytes 0 + } + + counter match { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval + } + + set norange { + type ${type_spec} + } + + set noconcat { + type ${type_spec%% *} + flags interval + } + + chain test { + type filter hook ingress device veth_a priority 0; + } +} +' + +err_buf= +info_buf= + +# Append string to error buffer +err() { + err_buf="${err_buf}${1} +" +} + +# Append string to information buffer +info() { + info_buf="${info_buf}${1} +" +} + +# Flush error buffer to stdout +err_flush() { + printf "%s" "${err_buf}" + err_buf= +} + +# Flush information buffer to stdout +info_flush() { + printf "%s" "${info_buf}" + info_buf= +} + +# Setup veth pair: this namespace receives traffic, B generates it +setup_veth() { + ip netns add B + ip link add veth_a type veth peer name veth_b || return 1 + + ip link set veth_a up + ip link set veth_b netns B + + ip -n B link set veth_b up + + ip addr add dev veth_a 10.0.0.1 + ip route add default dev veth_a + + ip -6 addr add fe80::1/64 dev veth_a nodad + ip -6 addr add 2001:db8::1/64 dev veth_a nodad + ip -6 route add default dev veth_a + + ip -n B route add default dev veth_b + + ip -6 -n B addr add fe80::2/64 dev veth_b nodad + ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad + ip -6 -n B route add default dev veth_b + + B() { + ip netns exec B "$@" >/dev/null 2>&1 + } + + sleep 2 +} + +# Fill in set template and initialise set +setup_set() { + eval "echo \"${set_template}\"" | nft -f - +} + +# Check that at least one of the needed tools is available +check_tools() { + __tools= + for tool in ${tools}; do + if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \ + ! nc -u -w0 1.1.1.1 1 2>/dev/null; then + # Some GNU netcat builds might not support IPv6 + __tools="${__tools} netcat-openbsd" + continue + fi + __tools="${__tools} ${tool}" + + command -v "${tool}" >/dev/null && return 0 + done + err "need one of:${__tools}, skipping" && return 1 +} + +# Set up function to send ICMP packets +setup_send_icmp() { + send_icmp() { + B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1 + } +} + +# Set up function to send ICMPv6 packets +setup_send_icmp6() { + if command -v ping6 >/dev/null; then + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping6 -q -c1 -W1 "${dst_addr6}" + } + else + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping -q -6 -c1 -W1 "${dst_addr6}" + } + fi +} + +# Set up function to send single UDP packets on IPv4 +setup_send_udp() { + if command -v sendip >/dev/null; then + send_udp() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}" + + # shellcheck disable=SC2086 # sendip needs split options + B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \ + ${dst_port} "${dst_addr4}" + + src_port= + dst_port= + src_addr4= + } + elif command -v nc >/dev/null; then + if nc -u -w0 1.1.1.1 1 2>/dev/null; then + # OpenBSD netcat + nc_opt="-w0" + else + # GNU netcat + nc_opt="-q0" + fi + + send_udp() { + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}" dev veth_b + __src_addr4="-s ${src_addr4}" + fi + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + [ -n "${src_port}" ] && src_port="-p ${src_port}" + + echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \ + "${src_port}" "${dst_addr4}" "${dst_port}" + + src_addr4= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp() { + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + B ip route add default dev veth_b + fi + + B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}" + + if [ -n "${src_addr4}" ]; then + B ip addr del "${src_addr4}/16" dev veth_b + fi + src_addr4= + } + else + return 1 + fi +} + +# Set up function to send single UDP packets on IPv6 +setup_send_udp6() { + if command -v sendip >/dev/null; then + send_udp6() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + if [ -n "${src_addr6}" ]; then + src_addr6="-6s ${src_addr6}" + else + src_addr6="-6s 2001:db8::2" + fi + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \ + ${dst_port} "${dst_addr6}" + + src_port= + dst_port= + src_addr6= + } + elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then + # GNU netcat might not work with IPv6, try next tool + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + [ -n "${src_port}" ] && src_port="-p ${src_port}" + + # shellcheck disable=SC2086 # this needs split options + echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \ + ${dst_addr6} ${dst_port} + + src_addr6= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ip addr add "${src_addr6}" dev veth_b nodad + B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}" + ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv4 +setup_flood_tcp() { + if command -v iperf3 >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b 2>/dev/null + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \ + ${src_addr4} -l16 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \ + -l20 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t TCP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv6 +setup_flood_tcp6() { + if command -v iperf3 >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr6}" ${dst_port} \ + ${src_port} ${src_addr6} -l16 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_addr6="${src_addr6}:${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr6}" -V ${dst_port} \ + ${src_addr6} -l1 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -6 ${dst_port} -L "${dst_addr6}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -6 -H "${dst_addr6}" ${dst_port} \ + -L "${src_addr6}" -l 1000 -t TCP_STREAM + + src_addr6= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send UDP traffic on IPv4 +setup_flood_udp() { + if command -v iperf3 >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \ + ${dst_port} ${src_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \ + ${dst_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t UDP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Find pktgen script and set up function to start pktgen injection +setup_perf() { + for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do + command -v "${pktgen_script_path}" >/dev/null && break + done + [ "${pktgen_script_path}" = "__notfound" ] && return 1 + + perf_ipv4() { + ${pktgen_script_path} -s80 \ + -i veth_a -d "${dst_addr4}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } + perf_ipv6() { + IP6=6 ${pktgen_script_path} -s100 \ + -i veth_a -d "${dst_addr6}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } +} + +# Clean up before each test +cleanup() { + nft reset counter inet filter test >/dev/null 2>&1 + nft flush ruleset >/dev/null 2>&1 + ip link del dummy0 2>/dev/null + ip route del default 2>/dev/null + ip -6 route del default 2>/dev/null + ip netns del B 2>/dev/null + ip link del veth_a 2>/dev/null + timeout= + killall iperf3 2>/dev/null + killall iperf 2>/dev/null + killall netperf 2>/dev/null + killall netserver 2>/dev/null + rm -f ${tmp} + sleep 2 +} + +# Entry point for setup functions +setup() { + if [ "$(id -u)" -ne 0 ]; then + echo " need to run as root" + exit ${KSELFTEST_SKIP} + fi + + cleanup + check_tools || return 1 + for arg do + if ! eval setup_"${arg}"; then + err " ${arg} not supported" + return 1 + fi + done +} + +# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it +format_addr4() { + a=$((${1} + 16777216 * 10 + 5)) + printf "%i.%i.%i.%i" \ + "$((a / 16777216))" "$((a % 16777216 / 65536))" \ + "$((a % 65536 / 256))" "$((a % 256))" +} + +# Format integer into IPv6 address, summing 2001:db8:: to it +format_addr6() { + printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))" +} + +# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it +format_mac() { + printf "00:01:%02x:%02x:%02x:%02x" \ + "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \ + "$((${1} % 65536 / 256))" "$((${1} % 256))" +} + +# Format integer into port, avoid 0 port +format_port() { + printf "%i" "$((${1} % 65534 + 1))" +} + +# Drop suffixed '6' from L4 protocol, if any +format_proto() { + printf "%s" "${proto}" | tr -d 6 +} + +# Format destination and source fields into nft concatenated type +format() { + __start= + __end= + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + for f in ${src}; do + __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" + __end="$(eval format_"${f}" "${srcend}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + + if [ -n "${timeout}" ]; then + echo "${__expr} timeout ${timeout}s }" + else + echo "${__expr} }" + fi +} + +# Format destination and source fields into nft type, start element only +format_norange() { + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __expr="${__expr}$(eval format_"${f}" "${start}")" + done + for f in ${src}; do + __expr="${__expr} . $(eval format_"${f}" "${start}")" + done + + echo "${__expr} }" +} + +# Format first destination field into nft type +format_noconcat() { + for f in ${dst}; do + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + echo "{ ${__start} }" + else + echo "{ ${__start}-${__end} }" + fi + return + done +} + +# Add single entry to 'test' set in 'inet filter' table +add() { + if ! nft add element inet filter test "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Format and output entries for sets in 'netdev perf' table +add_perf() { + if [ "${1}" = "test" ]; then + echo "add element netdev perf test $(format)" + elif [ "${1}" = "norange" ]; then + echo "add element netdev perf norange $(format_norange)" + elif [ "${1}" = "noconcat" ]; then + echo "add element netdev perf noconcat $(format_noconcat)" + fi +} + +# Add single entry to 'norange' set in 'netdev perf' table +add_perf_norange() { + if ! nft add element netdev perf norange "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Add single entry to 'noconcat' set in 'netdev perf' table +add_perf_noconcat() { + if ! nft add element netdev perf noconcat "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Delete single entry from set +del() { + if ! nft delete element inet filter test "${1}"; then + err "Failed to delete ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets() { + found=0 + for token in $(nft list counter inet filter test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'netdev perf' table +count_perf_packets() { + found=0 + for token in $(nft list counter netdev perf test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Set MAC addresses, send traffic according to specifier +flood() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval flood_\$proto +} + +# Set MAC addresses, start pktgen injection +perf() { + dst_mac="$(format_mac "${1}")" + ip link set veth_a address "${dst_mac}" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval perf_\$perf_proto +} + +# Set MAC addresses, send single packet, check that it matches, reset counter +send_match() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "1" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should have matched ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi + nft reset counter inet filter test >/dev/null +} + +# Set MAC addresses, send single packet, check that it doesn't match +send_nomatch() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "0" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should not have matched ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Correctness test template: +# - add ranged element, check that packets match it +# - check that packets outside range don't match it +# - remove some elements, check that packets don't match anymore +test_correctness() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + + # Delete elements now and then + if [ $((i % 3)) -eq 0 ]; then + del "$(format)" || return 1 + for j in $(seq ${start} \ + $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) \ + || return 1 + done + fi + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Concurrency test template: +# - add all the elements +# - start a thread for each physical thread that: +# - adds all the elements +# - flushes the set +# - adds all the elements +# - flushes the entire ruleset +# - adds the set back +# - adds all the elements +# - delete all the elements +test_concurrency() { + proto=${flood_proto} + tools=${flood_tools} + chain_spec=${flood_spec} + setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP} + + range_size=1 + cstart=${start} + flood_pids= + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!" + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + sleep 10 + + pids= + for c in $(seq 1 "$(nproc)"); do ( + for r in $(seq 1 "${race_repeat}"); do + range_size=1 + + # $start needs to be local to this subshell + # shellcheck disable=SC2030 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush inet filter test 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset + setup set 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + del "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + done + ) & pids="${pids} $!" + done + + # shellcheck disable=SC2046,SC2086 # word splitting wanted here + wait $(for pid in ${pids}; do echo ${pid}; done) + # shellcheck disable=SC2046,SC2086 + kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + # shellcheck disable=SC2046,SC2086 + wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + + return 0 +} + +# Timeout test template: +# - add all the elements with 3s timeout while checking that packets match +# - wait 3s after the last insertion, check that packets don't match any entry +test_timeout() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + + timeout=3 + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + sleep 3 + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Performance test template: +# - add concatenated ranged entries +# - add non-ranged concatenated entries (for hash set matching rate baseline) +# - add ranged entries with first field only (for rbhash baseline) +# - start pktgen injection directly on device rx path of this namespace +# - measure drop only rate, hash and rbtree baselines, then matching rate +test_performance() { + chain_spec=${perf_spec} + dst="${perf_dst}" + src="${perf_src}" + setup veth perf set || return ${KSELFTEST_SKIP} + + first=${start} + range_size=1 + for set in test norange noconcat; do + start=${first} + for i in $(seq ${start} $((start + perf_entries))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + elif [ ${start} -eq ${end} ]; then + end=$((start + 1)) + fi + + add_perf ${set} + + start=$((end + range_size)) + done > "${tmp}" + nft -f "${tmp}" + done + + perf $((end - 1)) ${srcstart} + + sleep 2 + + nft add rule netdev perf test counter name \"test\" drop + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline (drop from netdev hook): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @norange \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline hash (non-ranged entries): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline rbtree (match on first field only): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @test \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + p5="$(printf %5s "${perf_entries}")" + info " set with ${p5} full, ranged entries: ${pps}pps" + kill "${perf_pid}" +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } +tmp="$(mktemp)" +trap cleanup EXIT + +# Entry point for test runs +passed=0 +for name in ${TESTS}; do + printf "TEST: %s\n" "${name}" + for type in ${TYPES}; do + eval desc=\$TYPE_"${type}" + IFS=' +' + for __line in ${desc}; do + # shellcheck disable=SC2086 + eval ${__line%% *}=\"${__line##* }\"; + done + IFS=' +' + + if [ "${name}" = "concurrency" ] && \ + [ "${race_repeat}" = "0" ]; then + continue + fi + if [ "${name}" = "performance" ] && \ + [ "${perf_duration}" = "0" ]; then + continue + fi + + printf " %-60s " "${display}" + eval test_"${name}" + ret=$? + + if [ $ret -eq 0 ]; then + printf "[ OK ]\n" + info_flush + passed=$((passed + 1)) + elif [ $ret -eq 1 ]; then + printf "[FAIL]\n" + err_flush + exit 1 + elif [ $ret -eq ${KSELFTEST_SKIP} ]; then + printf "[SKIP]\n" + err_flush + fi + done +done + +[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} |