From 8c5d71d96379e80c7c0d0fa7186c04f4deb04f16 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:18 +0000 Subject: selftests/bpf: Add test for libbpf_bpf_prog_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a test for libbpf_bpf_prog_type_str. The test retrieves all variants of the bpf_prog_type enumeration using BTF and makes sure that the function under test works as expected for them. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523230428.3077108-3-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/libbpf_str.c | 57 ++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/libbpf_str.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c new file mode 100644 index 000000000000..42696aaebf3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +/* + * Utility function uppercasing an entire string. + */ +static void uppercase(char *s) +{ + for (; *s != '\0'; s++) + *s = toupper(*s); +} + +/* + * Test case to check that all bpf_prog_type variants are covered by + * libbpf_bpf_prog_type_str. + */ +void test_libbpf_bpf_prog_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_prog_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; + const char *prog_type_name; + const char *prog_type_str; + char buf[256]; + + prog_type_name = btf__str_by_offset(btf, e->name_off); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + ASSERT_OK_PTR(prog_type_str, prog_type_name); + + snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, prog_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} -- cgit v1.2.3-59-g8ed1b From b700eeb406a6c1f4d955242e06151f11f13d3e29 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:19 +0000 Subject: bpftool: Use libbpf_bpf_prog_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change switches bpftool over to using the recently introduced libbpf_bpf_prog_type_str function instead of maintaining its own string representation for the bpf_prog_type enum. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220523230428.3077108-4-deso@posteo.net --- tools/bpf/bpftool/feature.c | 57 ++++++++++++++-------- tools/bpf/bpftool/link.c | 19 +++++--- tools/bpf/bpftool/main.h | 3 -- tools/bpf/bpftool/map.c | 13 +++-- tools/bpf/bpftool/prog.c | 51 ++++--------------- .../selftests/bpf/test_bpftool_synctypes.py | 14 +----- 6 files changed, 65 insertions(+), 92 deletions(-) (limited to 'tools/testing') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index d12f46051aac..02753f934ed3 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -548,8 +548,8 @@ static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex) } static void -probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, - const char *define_prefix, __u32 ifindex) +probe_prog_type(enum bpf_prog_type prog_type, const char *prog_type_str, + bool *supported_types, const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF program_type "; @@ -580,20 +580,16 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types, supported_types[prog_type] |= res; - if (!prog_type_name[prog_type]) { - p_info("program type name not found (type %d)", prog_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(prog_type_name[prog_type]) > maxlen) { + if (strlen(prog_type_str) > maxlen) { p_info("program type name too long"); return; } - sprintf(feat_name, "have_%s_prog_type", prog_type_name[prog_type]); - sprintf(define_name, "%s_prog_type", prog_type_name[prog_type]); + sprintf(feat_name, "have_%s_prog_type", prog_type_str); + sprintf(define_name, "%s_prog_type", prog_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, prog_type_name[prog_type]); + sprintf(plain_desc, "%s%s", plain_comment, prog_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -728,10 +724,10 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, +probe_helpers_for_progtype(enum bpf_prog_type prog_type, + const char *prog_type_str, bool supported_type, const char *define_prefix, __u32 ifindex) { - const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; bool probe_res = false; @@ -747,12 +743,12 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } if (json_output) { - sprintf(feat_name, "%s_available_helpers", ptype_name); + sprintf(feat_name, "%s_available_helpers", prog_type_str); jsonw_name(json_wtr, feat_name); jsonw_start_array(json_wtr); } else if (!define_prefix) { printf("eBPF helpers supported for program type %s:", - ptype_name); + prog_type_str); } for (id = 1; id < ARRAY_SIZE(helper_name); id++) { @@ -768,7 +764,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, /* fallthrough */ default: probe_res |= probe_helper_for_progtype(prog_type, supported_type, - define_prefix, id, ptype_name, + define_prefix, id, prog_type_str, ifindex); } } @@ -943,15 +939,24 @@ static void section_program_types(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("program_types", "Scanning eBPF program types...", "/*** eBPF program types ***/", define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_prog_type(i, supported_types, define_prefix, ifindex); + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_prog_type(prog_type, prog_type_str, supported_types, define_prefix, + ifindex); + } print_end_section(); } @@ -974,7 +979,8 @@ static void section_map_types(const char *define_prefix, __u32 ifindex) static void section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int prog_type = BPF_PROG_TYPE_UNSPEC; + const char *prog_type_str; print_start_section("helpers", "Scanning eBPF helper functions...", @@ -996,9 +1002,18 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", define_prefix, define_prefix, define_prefix, define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) - probe_helpers_for_progtype(i, supported_types[i], define_prefix, + while (true) { + prog_type++; + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!prog_type_str) + break; + + probe_helpers_for_progtype(prog_type, prog_type_str, + supported_types[prog_type], + define_prefix, ifindex); + } print_end_section(); } diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 6353a789322b..e27108489604 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -121,6 +121,7 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) static int show_link_close_json(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; jsonw_start_object(json_wtr); @@ -137,12 +138,12 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - jsonw_string_field(json_wtr, "prog_type", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + jsonw_string_field(json_wtr, "prog_type", prog_type_str); else - jsonw_uint_field(json_wtr, "prog_type", - prog_info.type); + jsonw_uint_field(json_wtr, "prog_type", prog_info.type); show_link_attach_type_json(info->tracing.attach_type, json_wtr); @@ -214,6 +215,7 @@ static void show_iter_plain(struct bpf_link_info *info) static int show_link_close_plain(int fd, struct bpf_link_info *info) { struct bpf_prog_info prog_info; + const char *prog_type_str; int err; show_link_header_plain(info); @@ -228,9 +230,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < prog_type_name_size) - printf("\n\tprog_type %s ", - prog_type_name[prog_info.type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_info.type); + /* libbpf will return NULL for variants unknown to it. */ + if (prog_type_str) + printf("\n\tprog_type %s ", prog_type_str); else printf("\n\tprog_type %u ", prog_info.type); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index aa99ffab451a..74204d0e33cf 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -63,9 +63,6 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_LINK \ "LINK := { id LINK_ID | pinned FILE }" -extern const char * const prog_type_name[]; -extern const size_t prog_type_name_size; - extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; extern const char * const map_type_name[]; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 877387ef79c7..70a1fd5253da 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -513,10 +513,12 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) jsonw_string_field(json_wtr, "owner_prog_type", - prog_type_name[prog_type]); + prog_type_str); else jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); @@ -597,10 +599,11 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); + const char *prog_type_str; - if (prog_type < prog_type_name_size) - printf("owner_prog_type %s ", - prog_type_name[prog_type]); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + if (prog_type_str) + printf("owner_prog_type %s ", prog_type_str); else printf("owner_prog_type %d ", prog_type); } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5c2c63df92e8..39e1e7149f62 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -36,43 +36,6 @@ #define BPF_METADATA_PREFIX "bpf_metadata_" #define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) -const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", - [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", - [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", - [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", - [BPF_PROG_TYPE_TRACING] = "tracing", - [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_PROG_TYPE_EXT] = "ext", - [BPF_PROG_TYPE_LSM] = "lsm", - [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", - [BPF_PROG_TYPE_SYSCALL] = "syscall", -}; - -const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); - enum dump_mode { DUMP_JITED, DUMP_XLATED, @@ -428,12 +391,14 @@ out_free: static void print_prog_header_json(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; jsonw_uint_field(json_wtr, "id", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - jsonw_string_field(json_wtr, "type", - prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + + if (prog_type_str) + jsonw_string_field(json_wtr, "type", prog_type_str); else jsonw_uint_field(json_wtr, "type", info->type); @@ -515,11 +480,13 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) static void print_prog_header_plain(struct bpf_prog_info *info, int fd) { + const char *prog_type_str; char prog_name[MAX_PROG_FULL_NAME]; printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(prog_type_name)) - printf("%s ", prog_type_name[info->type]); + prog_type_str = libbpf_bpf_prog_type_str(info->type); + if (prog_type_str) + printf("%s ", prog_type_str); else printf("type %u ", info->type); diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index c0e7acd698ed..1f0ff783f22d 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -333,9 +333,6 @@ class ProgFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'prog.c') - def get_prog_types(self): - return self.get_types_from_array('prog_type_name') - def get_attach_types(self): return self.get_types_from_array('attach_type_strings') @@ -533,16 +530,6 @@ def main(): verify(source_map_types, bashcomp_map_types, f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') - # Program types (enum) - - ref = bpf_info.get_prog_types() - - prog_info = ProgFileExtractor() - prog_types = set(prog_info.get_prog_types().keys()) - - verify(ref, prog_types, - f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') - # Attach types (enum) ref = bpf_info.get_attach_types() @@ -556,6 +543,7 @@ def main(): # Attach types (names) + prog_info = ProgFileExtractor() source_prog_attach_types = set(prog_info.get_attach_types().values()) help_prog_attach_types = prog_info.get_prog_attach_help() -- cgit v1.2.3-59-g8ed1b From c3a2574011a313707570d35b7e6e6536eda69dbb Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:21 +0000 Subject: selftests/bpf: Add test for libbpf_bpf_map_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a test for libbpf_bpf_map_type_str. The test retrieves all variants of the bpf_map_type enumeration using BTF and makes sure that the function under test works as expected for them. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523230428.3077108-6-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/libbpf_str.c | 56 +++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index 42696aaebf3e..f5185a46ee00 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -14,11 +14,53 @@ static void uppercase(char *s) *s = toupper(*s); } +/* + * Test case to check that all bpf_map_type variants are covered by + * libbpf_bpf_map_type_str. + */ +static void test_libbpf_bpf_map_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_map_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_map_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_map_type map_type = (enum bpf_map_type)e->val; + const char *map_type_name; + const char *map_type_str; + char buf[256]; + + map_type_name = btf__str_by_offset(btf, e->name_off); + map_type_str = libbpf_bpf_map_type_str(map_type); + ASSERT_OK_PTR(map_type_str, map_type_name); + + snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + /* * Test case to check that all bpf_prog_type variants are covered by * libbpf_bpf_prog_type_str. */ -void test_libbpf_bpf_prog_type_str(void) +static void test_libbpf_bpf_prog_type_str(void) { struct btf *btf; const struct btf_type *t; @@ -55,3 +97,15 @@ void test_libbpf_bpf_prog_type_str(void) cleanup: btf__free(btf); } + +/* + * Run all libbpf str conversion tests. + */ +void test_libbpf_str(void) +{ + if (test__start_subtest("bpf_map_type_str")) + test_libbpf_bpf_map_type_str(); + + if (test__start_subtest("bpf_prog_type_str")) + test_libbpf_bpf_prog_type_str(); +} -- cgit v1.2.3-59-g8ed1b From 2e98964bd6e283568730b1a4da3b1e4da3306a8e Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:22 +0000 Subject: bpftool: Use libbpf_bpf_map_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change switches bpftool over to using the recently introduced libbpf_bpf_map_type_str function instead of maintaining its own string representation for the bpf_map_type enum. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220523230428.3077108-7-deso@posteo.net --- tools/bpf/bpftool/feature.c | 30 ++++++---- tools/bpf/bpftool/main.h | 3 - tools/bpf/bpftool/map.c | 69 ++++++++-------------- .../selftests/bpf/test_bpftool_synctypes.py | 48 +++++++++------ 4 files changed, 71 insertions(+), 79 deletions(-) (limited to 'tools/testing') diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 02753f934ed3..cc9e4df8c58e 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -615,8 +615,8 @@ static bool probe_map_type_ifindex(enum bpf_map_type map_type, __u32 ifindex) } static void -probe_map_type(enum bpf_map_type map_type, const char *define_prefix, - __u32 ifindex) +probe_map_type(enum bpf_map_type map_type, char const *map_type_str, + const char *define_prefix, __u32 ifindex) { char feat_name[128], plain_desc[128], define_name[128]; const char *plain_comment = "eBPF map_type "; @@ -641,20 +641,16 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, * check required for unprivileged users */ - if (!map_type_name[map_type]) { - p_info("map type name not found (type %d)", map_type); - return; - } maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1; - if (strlen(map_type_name[map_type]) > maxlen) { + if (strlen(map_type_str) > maxlen) { p_info("map type name too long"); return; } - sprintf(feat_name, "have_%s_map_type", map_type_name[map_type]); - sprintf(define_name, "%s_map_type", map_type_name[map_type]); + sprintf(feat_name, "have_%s_map_type", map_type_str); + sprintf(define_name, "%s_map_type", map_type_str); uppercase(define_name, sizeof(define_name)); - sprintf(plain_desc, "%s%s", plain_comment, map_type_name[map_type]); + sprintf(plain_desc, "%s%s", plain_comment, map_type_str); print_bool_feature(feat_name, plain_desc, define_name, res, define_prefix); } @@ -963,15 +959,23 @@ section_program_types(bool *supported_types, const char *define_prefix, static void section_map_types(const char *define_prefix, __u32 ifindex) { - unsigned int i; + unsigned int map_type = BPF_MAP_TYPE_UNSPEC; + const char *map_type_str; print_start_section("map_types", "Scanning eBPF map types...", "/*** eBPF map types ***/", define_prefix); - for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++) - probe_map_type(i, define_prefix, ifindex); + while (true) { + map_type++; + map_type_str = libbpf_bpf_map_type_str(map_type); + /* libbpf will return NULL for variants unknown to it. */ + if (!map_type_str) + break; + + probe_map_type(map_type, map_type_str, define_prefix, ifindex); + } print_end_section(); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 74204d0e33cf..e4fdaa0740b3 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -65,9 +65,6 @@ static inline void *u64_to_ptr(__u64 ptr) extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; -extern const char * const map_type_name[]; -extern const size_t map_type_name_size; - /* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 70a1fd5253da..800834be1bcb 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -22,42 +22,6 @@ #include "json_writer.h" #include "main.h" -const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", - [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", - [BPF_MAP_TYPE_QUEUE] = "queue", - [BPF_MAP_TYPE_STACK] = "stack", - [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", - [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_MAP_TYPE_RINGBUF] = "ringbuf", - [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", - [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", - [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", -}; - -const size_t map_type_name_size = ARRAY_SIZE(map_type_name); - static struct hashmap *map_table; static bool map_is_per_cpu(__u32 type) @@ -81,12 +45,18 @@ static bool map_is_map_of_progs(__u32 type) static int map_type_from_str(const char *type) { + const char *map_type_str; unsigned int i; - for (i = 0; i < ARRAY_SIZE(map_type_name); i++) + for (i = 0; ; i++) { + map_type_str = libbpf_bpf_map_type_str(i); + if (!map_type_str) + break; + /* Don't allow prefixing in case of possible future shadowing */ - if (map_type_name[i] && !strcmp(map_type_name[i], type)) + if (!strcmp(map_type_str, type)) return i; + } return -1; } @@ -472,9 +442,12 @@ static int parse_elem(char **argv, struct bpf_map_info *info, static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) { + const char *map_type_str; + jsonw_uint_field(wtr, "id", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - jsonw_string_field(wtr, "type", map_type_name[info->type]); + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + jsonw_string_field(wtr, "type", map_type_str); else jsonw_uint_field(wtr, "type", info->type); @@ -561,9 +534,13 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) static void show_map_header_plain(struct bpf_map_info *info) { + const char *map_type_str; + printf("%u: ", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - printf("%s ", map_type_name[info->type]); + + map_type_str = libbpf_bpf_map_type_str(info->type); + if (map_type_str) + printf("%s ", map_type_str); else printf("type %u ", info->type); @@ -879,9 +856,13 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, } if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && - info->value_size != 8) + info->value_size != 8) { + const char *map_type_str; + + map_type_str = libbpf_bpf_map_type_str(info->type); p_info("Warning: cannot read values from %s map with value_size != 8", - map_type_name[info->type]); + map_type_str); + } while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 1f0ff783f22d..0a08c074a8fe 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -186,6 +186,27 @@ class FileExtractor(object): parser.search_block(start_marker) return parser.parse(pattern, end_marker) + def make_enum_map(self, names, enum_prefix): + """ + Search for and parse an enum containing BPF_* members, just as get_enum + does. However, instead of just returning a set of the variant names, + also generate a textual representation from them by (assuming and) + removing a provided prefix and lowercasing the remainder. Then return a + dict mapping from name to textual representation. + + @enum_values: a set of enum values; e.g., as retrieved by get_enum + @enum_prefix: the prefix to remove from each of the variants to infer + textual representation + """ + mapping = {} + for name in names: + if not name.startswith(enum_prefix): + raise Exception(f"enum variant {name} does not start with {enum_prefix}") + text = name[len(enum_prefix):].lower() + mapping[name] = text + + return mapping + def __get_description_list(self, start_marker, pattern, end_marker): parser = InlineListParser(self.reader) parser.search_block(start_marker) @@ -345,9 +366,6 @@ class MapFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'map.c') - def get_map_types(self): - return self.get_types_from_array('map_type_name') - def get_map_help(self): return self.get_help_list('TYPE') @@ -403,8 +421,9 @@ class BpfHeaderExtractor(FileExtractor): def get_prog_types(self): return self.get_enum('bpf_prog_type') - def get_map_types(self): - return self.get_enum('bpf_map_type') + def get_map_type_map(self): + names = self.get_enum('bpf_map_type') + return self.make_enum_map(names, 'BPF_MAP_TYPE_') def get_attach_types(self): return self.get_enum('bpf_attach_type') @@ -492,21 +511,12 @@ def main(): """) args = argParser.parse_args() - # Map types (enum) - bpf_info = BpfHeaderExtractor() - ref = bpf_info.get_map_types() - - map_info = MapFileExtractor() - source_map_items = map_info.get_map_types() - map_types_enum = set(source_map_items.keys()) - - verify(ref, map_types_enum, - f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') # Map types (names) - source_map_types = set(source_map_items.values()) + map_info = MapFileExtractor() + source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') help_map_types = map_info.get_map_help() @@ -522,13 +532,13 @@ def main(): bashcomp_map_types = bashcomp_info.get_map_types() verify(source_map_types, help_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):') verify(source_map_types, man_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):') verify(help_map_options, man_map_options, f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') verify(source_map_types, bashcomp_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') # Attach types (enum) -- cgit v1.2.3-59-g8ed1b From 0b27b3d9fdf88c132b095a30ee2c61cd6e56e6cc Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:24 +0000 Subject: selftests/bpf: Add test for libbpf_bpf_attach_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a test for libbpf_bpf_attach_type_str. The test retrieves all variants of the bpf_attach_type enumeration using BTF and makes sure that the function under test works as expected for them. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523230428.3077108-9-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/libbpf_str.c | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index f5185a46ee00..c88df92868c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -14,6 +14,51 @@ static void uppercase(char *s) *s = toupper(*s); } +/* + * Test case to check that all bpf_attach_type variants are covered by + * libbpf_bpf_attach_type_str. + */ +static void test_libbpf_bpf_attach_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_attach_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_attach_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val; + const char *attach_type_name; + const char *attach_type_str; + char buf[256]; + + if (attach_type == __MAX_BPF_ATTACH_TYPE) + continue; + + attach_type_name = btf__str_by_offset(btf, e->name_off); + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + ASSERT_OK_PTR(attach_type_str, attach_type_name); + + snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, attach_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + /* * Test case to check that all bpf_map_type variants are covered by * libbpf_bpf_map_type_str. @@ -103,6 +148,9 @@ cleanup: */ void test_libbpf_str(void) { + if (test__start_subtest("bpf_attach_type_str")) + test_libbpf_bpf_attach_type_str(); + if (test__start_subtest("bpf_map_type_str")) test_libbpf_bpf_map_type_str(); -- cgit v1.2.3-59-g8ed1b From 1ba5ad36e00f46e3f7676f5de6b87f5a2f57f1f1 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:25 +0000 Subject: bpftool: Use libbpf_bpf_attach_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change switches bpftool over to using the recently introduced libbpf_bpf_attach_type_str function instead of maintaining its own string representation for the bpf_attach_type enum. Note that contrary to other enum types, the variant names that bpftool maps bpf_attach_type to do not adhere a simple to follow rule. With bpf_prog_type, for example, the textual representation can easily be inferred by stripping the BPF_PROG_TYPE_ prefix and lowercasing the remaining string. bpf_attach_type violates this rule for various variants. We decided to fix up this deficiency with this change, meaning that bpftool uses the same textual representations as libbpf. Supporting tests, completion scripts, and man pages have been adjusted accordingly. However, we did add support for accepting (the now undocumented) original attach type names when they are provided by users. For the test (test_bpftool_synctypes.py), I have removed the enum representation checks, because we no longer mirror the various enum variant names in bpftool source code. For the man page, help text, and completion script checks we are now using enum definitions from uapi/linux/bpf.h as the source of truth directly. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220523230428.3077108-10-deso@posteo.net --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 16 +++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 5 +- tools/bpf/bpftool/bash-completion/bpftool | 18 ++-- tools/bpf/bpftool/cgroup.c | 53 +++++++---- tools/bpf/bpftool/common.c | 82 +++++++--------- tools/bpf/bpftool/link.c | 15 ++- tools/bpf/bpftool/main.h | 14 +++ tools/bpf/bpftool/prog.c | 25 ++++- .../selftests/bpf/test_bpftool_synctypes.py | 104 ++++++++------------- 9 files changed, 182 insertions(+), 150 deletions(-) (limited to 'tools/testing') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index a17e9aa314fd..bd015ec9847b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -31,11 +31,17 @@ CGROUP COMMANDS | **bpftool** **cgroup help** | | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } -| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | -| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| *ATTACH_TYPE* := { **cgroup_inet_ingress** | **cgroup_inet_egress** | +| **cgroup_inet_sock_create** | **cgroup_sock_ops** | +| **cgroup_device** | **cgroup_inet4_bind** | **cgroup_inet6_bind** | +| **cgroup_inet4_post_bind** | **cgroup_inet6_post_bind** | +| **cgroup_inet4_connect** | **cgroup_inet6_connect** | +| **cgroup_inet4_getpeername** | **cgroup_inet6_getpeername** | +| **cgroup_inet4_getsockname** | **cgroup_inet6_getsockname** | +| **cgroup_udp4_sendmsg** | **cgroup_udp6_sendmsg** | +| **cgroup_udp4_recvmsg** | **cgroup_udp6_recvmsg** | +| **cgroup_sysctl** | **cgroup_getsockopt** | **cgroup_setsockopt** | +| **cgroup_inet_sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index a2e9359e554c..eb1b2a254eb1 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -53,8 +53,9 @@ PROG COMMANDS | **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } -| *ATTACH_TYPE* := { -| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** +| *ATTACH_TYPE* := { +| **sk_msg_verdict** | **sk_skb_verdict** | **sk_skb_stream_verdict** | +| **sk_skb_stream_parser** | **flow_dissector** | } | *METRICs* := { | **cycles** | **instructions** | **l1d_loads** | **llc_misses** | diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 5df8d72c5179..91f89a9a5b36 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -407,8 +407,8 @@ _bpftool() return 0 ;; 5) - local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ - skb_verdict stream_verdict stream_parser \ + local BPFTOOL_PROG_ATTACH_TYPES='sk_msg_verdict \ + sk_skb_verdict sk_skb_stream_verdict sk_skb_stream_parser \ flow_dissector' COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) return 0 @@ -1039,12 +1039,14 @@ _bpftool() return 0 ;; attach|detach) - local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ - sock_create sock_ops device \ - bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ - getpeername4 getpeername6 getsockname4 getsockname6 \ - sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt sock_release' + local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \ + cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \ + cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \ + cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \ + cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \ + cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \ + cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \ + cgroup_inet_sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' # Check for $prev = $command first diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index effe136119d7..42421fe47a58 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -21,25 +21,43 @@ #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" -#define HELP_SPEC_ATTACH_TYPES \ - " ATTACH_TYPE := { ingress | egress | sock_create |\n" \ - " sock_ops | device | bind4 | bind6 |\n" \ - " post_bind4 | post_bind6 | connect4 |\n" \ - " connect6 | getpeername4 | getpeername6 |\n" \ - " getsockname4 | getsockname6 | sendmsg4 |\n" \ - " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt |\n" \ - " sock_release }" +#define HELP_SPEC_ATTACH_TYPES \ + " ATTACH_TYPE := { cgroup_inet_ingress | cgroup_inet_egress |\n" \ + " cgroup_inet_sock_create | cgroup_sock_ops |\n" \ + " cgroup_device | cgroup_inet4_bind |\n" \ + " cgroup_inet6_bind | cgroup_inet4_post_bind |\n" \ + " cgroup_inet6_post_bind | cgroup_inet4_connect |\n" \ + " cgroup_inet6_connect | cgroup_inet4_getpeername |\n" \ + " cgroup_inet6_getpeername | cgroup_inet4_getsockname |\n" \ + " cgroup_inet6_getsockname | cgroup_udp4_sendmsg |\n" \ + " cgroup_udp6_sendmsg | cgroup_udp4_recvmsg |\n" \ + " cgroup_udp6_recvmsg | cgroup_sysctl |\n" \ + " cgroup_getsockopt | cgroup_setsockopt |\n" \ + " cgroup_inet_sock_release }" static unsigned int query_flags; static enum bpf_attach_type parse_attach_type(const char *str) { + const char *attach_type_str; enum bpf_attach_type type; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - if (attach_type_name[type] && - is_prefix(str, attach_type_name[type])) + for (type = 0; ; type++) { + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!attach_type_str) + break; + if (!strcmp(str, attach_type_str)) + return type; + } + + /* Also check traditionally used attach type strings. For these we keep + * allowing prefixed usage. + */ + for (type = 0; ; type++) { + attach_type_str = bpf_attach_type_input_str(type); + if (!attach_type_str) + break; + if (is_prefix(str, attach_type_str)) return type; } @@ -52,6 +70,7 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, { char prog_name[MAX_PROG_FULL_NAME]; struct bpf_prog_info info = {}; + const char *attach_type_str; __u32 info_len = sizeof(info); int prog_fd; @@ -64,13 +83,13 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, return -1; } + attach_type_str = libbpf_bpf_attach_type_str(attach_type); get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); if (json_output) { jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(json_wtr, "attach_type", - attach_type_name[attach_type]); + if (attach_type_str) + jsonw_string_field(json_wtr, "attach_type", attach_type_str); else jsonw_uint_field(json_wtr, "attach_type", attach_type); jsonw_string_field(json_wtr, "attach_flags", @@ -79,8 +98,8 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type, jsonw_end_object(json_wtr); } else { printf("%s%-8u ", level ? " " : "", info.id); - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("%-15s", attach_type_name[attach_type]); + if (attach_type_str) + printf("%-15s", attach_type_str); else printf("type %-10u", attach_type); printf(" %-15s %-15s\n", attach_flags_str, prog_name); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c740142c24d8..a45b42ee8ab0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -31,52 +31,6 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_INET_SOCK_RELEASE] = "sock_release", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", - [BPF_SK_LOOKUP] = "sk_lookup", - [BPF_TRACE_ITER] = "trace_iter", - [BPF_XDP_DEVMAP] = "xdp_devmap", - [BPF_XDP_CPUMAP] = "xdp_cpumap", - [BPF_XDP] = "xdp", - [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", - [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", - [BPF_PERF_EVENT] = "perf_event", - [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", -}; - void p_err(const char *fmt, ...) { va_list ap; @@ -1009,3 +963,39 @@ bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx) { return k1 == k2; } + +const char *bpf_attach_type_input_str(enum bpf_attach_type t) +{ + switch (t) { + case BPF_CGROUP_INET_INGRESS: return "ingress"; + case BPF_CGROUP_INET_EGRESS: return "egress"; + case BPF_CGROUP_INET_SOCK_CREATE: return "sock_create"; + case BPF_CGROUP_INET_SOCK_RELEASE: return "sock_release"; + case BPF_CGROUP_SOCK_OPS: return "sock_ops"; + case BPF_CGROUP_DEVICE: return "device"; + case BPF_CGROUP_INET4_BIND: return "bind4"; + case BPF_CGROUP_INET6_BIND: return "bind6"; + case BPF_CGROUP_INET4_CONNECT: return "connect4"; + case BPF_CGROUP_INET6_CONNECT: return "connect6"; + case BPF_CGROUP_INET4_POST_BIND: return "post_bind4"; + case BPF_CGROUP_INET6_POST_BIND: return "post_bind6"; + case BPF_CGROUP_INET4_GETPEERNAME: return "getpeername4"; + case BPF_CGROUP_INET6_GETPEERNAME: return "getpeername6"; + case BPF_CGROUP_INET4_GETSOCKNAME: return "getsockname4"; + case BPF_CGROUP_INET6_GETSOCKNAME: return "getsockname6"; + case BPF_CGROUP_UDP4_SENDMSG: return "sendmsg4"; + case BPF_CGROUP_UDP6_SENDMSG: return "sendmsg6"; + case BPF_CGROUP_SYSCTL: return "sysctl"; + case BPF_CGROUP_UDP4_RECVMSG: return "recvmsg4"; + case BPF_CGROUP_UDP6_RECVMSG: return "recvmsg6"; + case BPF_CGROUP_GETSOCKOPT: return "getsockopt"; + case BPF_CGROUP_SETSOCKOPT: return "setsockopt"; + case BPF_TRACE_RAW_TP: return "raw_tp"; + case BPF_TRACE_FENTRY: return "fentry"; + case BPF_TRACE_FEXIT: return "fexit"; + case BPF_MODIFY_RETURN: return "mod_ret"; + case BPF_SK_REUSEPORT_SELECT: return "sk_skb_reuseport_select"; + case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE: return "sk_skb_reuseport_select_or_migrate"; + default: return libbpf_bpf_attach_type_str(t); + } +} diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index e27108489604..66a25450b598 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -78,9 +78,11 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr) static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - jsonw_string_field(wtr, "attach_type", - attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + jsonw_string_field(wtr, "attach_type", attach_type_str); else jsonw_uint_field(wtr, "attach_type", attach_type); } @@ -196,8 +198,11 @@ static void show_link_header_plain(struct bpf_link_info *info) static void show_link_attach_type_plain(__u32 attach_type) { - if (attach_type < ARRAY_SIZE(attach_type_name)) - printf("attach_type %s ", attach_type_name[attach_type]); + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + if (attach_type_str) + printf("attach_type %s ", attach_type_str); else printf("attach_type %u ", attach_type); } diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index e4fdaa0740b3..6c311f47147e 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -243,6 +243,20 @@ int print_all_levels(__maybe_unused enum libbpf_print_level level, size_t hash_fn_for_key_as_id(const void *key, void *ctx); bool equal_fn_for_key_as_id(const void *k1, const void *k2, void *ctx); +/* bpf_attach_type_input_str - convert the provided attach type value into a + * textual representation that we accept for input purposes. + * + * This function is similar in nature to libbpf_bpf_attach_type_str, but + * recognizes some attach type names that have been used by the program in the + * past and which do not follow the string inference scheme that libbpf uses. + * These textual representations should only be used for user input. + * + * @t: The attach type + * Returns a pointer to a static string identifying the attach type. NULL is + * returned for unknown bpf_attach_type values. + */ +const char *bpf_attach_type_input_str(enum bpf_attach_type t); + static inline void *u32_as_hash_field(__u32 x) { return (void *)(uintptr_t)x; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 39e1e7149f62..e71f0b2da50b 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -41,12 +41,23 @@ enum dump_mode { DUMP_XLATED, }; +static const bool attach_types[] = { + [BPF_SK_SKB_STREAM_PARSER] = true, + [BPF_SK_SKB_STREAM_VERDICT] = true, + [BPF_SK_SKB_VERDICT] = true, + [BPF_SK_MSG_VERDICT] = true, + [BPF_FLOW_DISSECTOR] = true, + [__MAX_BPF_ATTACH_TYPE] = false, +}; + +/* Textual representations traditionally used by the program and kept around + * for the sake of backwards compatibility. + */ static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_SKB_VERDICT] = "skb_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", - [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; @@ -57,6 +68,14 @@ static enum bpf_attach_type parse_attach_type(const char *str) enum bpf_attach_type type; for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + if (attach_types[type]) { + const char *attach_type_str; + + attach_type_str = libbpf_bpf_attach_type_str(type); + if (!strcmp(str, attach_type_str)) + return type; + } + if (attach_type_strings[type] && is_prefix(str, attach_type_strings[type])) return type; @@ -2341,8 +2360,8 @@ static int do_help(int argc, char **argv) " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" - " stream_parser | flow_dissector }\n" + " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" + " sk_skb_stream_parser | flow_dissector }\n" " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index 0a08c074a8fe..e443e6542cb9 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -58,7 +58,7 @@ class BlockParser(object): class ArrayParser(BlockParser): """ - A parser for extracting dicionaries of values from some BPF-related arrays. + A parser for extracting a set of values from some BPF-related arrays. @reader: a pointer to the open file to parse @array_name: name of the array to parse """ @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,15 +80,15 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') - entries = {} + pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + entries = set() while True: line = self.reader.readline() if line == '' or re.match(self.end_marker, line): break capture = pattern.search(line) if capture: - entries[capture.group(1)] = capture.group(2) + entries |= {capture.group(1)} return entries class InlineListParser(BlockParser): @@ -115,7 +115,7 @@ class InlineListParser(BlockParser): class FileExtractor(object): """ A generic reader for extracting data from a given file. This class contains - several helper methods that wrap arround parser objects to extract values + several helper methods that wrap around parser objects to extract values from different structures. This class does not offer a way to set a filename, which is expected to be defined in children classes. @@ -139,21 +139,19 @@ class FileExtractor(object): def get_types_from_array(self, array_name): """ - Search for and parse an array associating names to BPF_* enum members, - for example: + Search for and parse a list of allowed BPF_* enum members, for example: - const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", + const bool prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = true, + [BPF_PROG_TYPE_SOCKET_FILTER] = true, + [BPF_PROG_TYPE_KPROBE] = true, }; - Return a dictionary with the enum member names as keys and the - associated names as values, for example: + Return a set of the enum members, for example: - {'BPF_PROG_TYPE_UNSPEC': 'unspec', - 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', - 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} @array_name: name of the array to parse """ @@ -355,7 +353,8 @@ class ProgFileExtractor(SourceFileExtractor): filename = os.path.join(BPFTOOL_DIR, 'prog.c') def get_attach_types(self): - return self.get_types_from_array('attach_type_strings') + types = self.get_types_from_array('attach_types') + return self.make_enum_map(types, 'BPF_') def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') @@ -378,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor): def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') -class CommonFileExtractor(SourceFileExtractor): - """ - An extractor for bpftool's common.c. - """ - filename = os.path.join(BPFTOOL_DIR, 'common.c') - - def __init__(self): - super().__init__() - self.attach_types = {} - - def get_attach_types(self): - if not self.attach_types: - self.attach_types = self.get_types_from_array('attach_type_name') - return self.attach_types - - def get_cgroup_attach_types(self): - if not self.attach_types: - self.get_attach_types() - cgroup_types = {} - for (key, value) in self.attach_types.items(): - if key.find('BPF_CGROUP') != -1: - cgroup_types[key] = value - return cgroup_types - class GenericSourceExtractor(SourceFileExtractor): """ An extractor for generic source code files. @@ -418,6 +393,10 @@ class BpfHeaderExtractor(FileExtractor): """ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + def __init__(self): + super().__init__() + self.attach_types = {} + def get_prog_types(self): return self.get_enum('bpf_prog_type') @@ -425,8 +404,17 @@ class BpfHeaderExtractor(FileExtractor): names = self.get_enum('bpf_map_type') return self.make_enum_map(names, 'BPF_MAP_TYPE_') - def get_attach_types(self): - return self.get_enum('bpf_attach_type') + def get_attach_type_map(self): + if not self.attach_types: + names = self.get_enum('bpf_attach_type') + self.attach_types = self.make_enum_map(names, 'BPF_') + return self.attach_types + + def get_cgroup_attach_type_map(self): + if not self.attach_types: + self.get_attach_type_map() + return {name: text for name, text in self.attach_types.items() + if name.startswith('BPF_CGROUP')} class ManPageExtractor(FileExtractor): """ @@ -540,17 +528,6 @@ def main(): verify(source_map_types, bashcomp_map_types, f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') - # Attach types (enum) - - ref = bpf_info.get_attach_types() - bpf_info.close() - - common_info = CommonFileExtractor() - attach_types = common_info.get_attach_types() - - verify(ref, attach_types, - f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') - # Attach types (names) prog_info = ProgFileExtractor() @@ -569,18 +546,17 @@ def main(): bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() verify(source_prog_attach_types, help_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_prog_attach_types, man_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):') verify(help_prog_options, man_prog_options, f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') verify(source_prog_attach_types, bashcomp_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') # Cgroup attach types - - source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) - common_info.close() + source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values()) + bpf_info.close() cgroup_info = CgroupFileExtractor() help_cgroup_attach_types = cgroup_info.get_prog_attach_help() @@ -596,13 +572,13 @@ def main(): bashcomp_info.close() verify(source_cgroup_attach_types, help_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_cgroup_attach_types, man_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') verify(help_cgroup_options, man_cgroup_options, f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') # Options for remaining commands -- cgit v1.2.3-59-g8ed1b From dea73da2213a722d952840edd598f2f3a40ddc68 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Mon, 23 May 2022 23:04:27 +0000 Subject: selftests/bpf: Add test for libbpf_bpf_link_type_str MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a test for libbpf_bpf_link_type_str. The test retrieves all variants of the bpf_link_type enumeration using BTF and makes sure that the function under test works as expected for them. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Quentin Monnet Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220523230428.3077108-12-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/libbpf_str.c | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index c88df92868c1..93e9cddaadcf 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -59,6 +59,51 @@ cleanup: btf__free(btf); } +/* + * Test case to check that all bpf_link_type variants are covered by + * libbpf_bpf_link_type_str. + */ +static void test_libbpf_bpf_link_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_link_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_link_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_link_type link_type = (enum bpf_link_type)e->val; + const char *link_type_name; + const char *link_type_str; + char buf[256]; + + if (link_type == MAX_BPF_LINK_TYPE) + continue; + + link_type_name = btf__str_by_offset(btf, e->name_off); + link_type_str = libbpf_bpf_link_type_str(link_type); + ASSERT_OK_PTR(link_type_str, link_type_name); + + snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, link_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + /* * Test case to check that all bpf_map_type variants are covered by * libbpf_bpf_map_type_str. @@ -151,6 +196,9 @@ void test_libbpf_str(void) if (test__start_subtest("bpf_attach_type_str")) test_libbpf_bpf_attach_type_str(); + if (test__start_subtest("bpf_link_type_str")) + test_libbpf_bpf_link_type_str(); + if (test__start_subtest("bpf_map_type_str")) test_libbpf_bpf_map_type_str(); -- cgit v1.2.3-59-g8ed1b From eb7b36ce47f830a01ad9405e673b563cc3638d5d Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Sat, 21 May 2022 23:13:29 +0800 Subject: selftests/bpf: Fix test_run logic in fexit_stress.c In the commit da00d2f117a0 ("bpf: Add test ops for BPF_PROG_TYPE_TRACING"), the bpf_fentry_test1 function was moved into bpf_prog_test_run_tracing(), which is the test_run function of the tracing BPF programs. Thus calling 'bpf_prog_test_run_opts(filter_fd, &topts)' will not trigger bpf_fentry_test1 function as filter_fd is a sk_filter BPF program. Fix it by replacing filter_fd with fexit_fd in the bpf_prog_test_run_opts() function. Fixes: da00d2f117a0 ("bpf: Add test ops for BPF_PROG_TYPE_TRACING") Signed-off-by: Yuntao Wang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220521151329.648013-1-ytcoode@gmail.com --- .../selftests/bpf/prog_tests/fexit_stress.c | 32 +++------------------- 1 file changed, 4 insertions(+), 28 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index a7e74297f15f..5a7e6011f6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -7,11 +7,9 @@ void serial_test_fexit_stress(void) { - char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - char error[4096]; - int err, i, filter_fd; + int err, i; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -20,25 +18,9 @@ void serial_test_fexit_stress(void) LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), ); - const struct bpf_insn skb_program[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); - - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = test_skb, - .data_size_in = sizeof(test_skb), - .repeat = 1, - ); + LIBBPF_OPTS(bpf_test_run_opts, topts); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); @@ -58,15 +40,9 @@ void serial_test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); - if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) - goto out; + err = bpf_prog_test_run_opts(fexit_fd[0], &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); - err = bpf_prog_test_run_opts(filter_fd, &topts); - close(filter_fd); - CHECK_FAIL(err); out: for (i = 0; i < CNT; i++) { if (link_fd[i]) -- cgit v1.2.3-59-g8ed1b From e6ff92f41b65fce07365f1066fb13b5e42aca08d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 1 Jun 2022 16:40:50 -0700 Subject: selftests/bpf: Fix tc_redirect_dtime tc_redirect_dtime was reported flaky from time to time. It always fails at the udp test and complains about the bpf@tc-ingress got a skb->tstamp when handling udp packet. It is unexpected because the skb->tstamp should have been cleared when crossing different netns. The most likely cause is that the skb is actually a tcp packet from the earlier tcp test. It could be the final TCP_FIN handling. This patch tightens the skb->tstamp check in the bpf prog. It ensures the skb is the current testing traffic. First, it checks that skb matches the IPPROTO of the running test (i.e. tcp vs udp). Second, it checks the server port (dst_ns_port). The server port is unique for each test (50000 + test_enum). Also fixed a typo in test_udp_dtime(): s/P100/P101/ Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh") Reported-by: Andrii Nakryiko Signed-off-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220601234050.2572671-1-kafai@fb.com --- .../testing/selftests/bpf/prog_tests/tc_redirect.c | 8 ++-- tools/testing/selftests/bpf/progs/test_tc_dtime.c | 53 +++++++++++++++++++++- 2 files changed, 55 insertions(+), 6 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 958dae769c52..cb6a53b3e023 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel) __u32 *errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); @@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_STREAM, addr, 0); + test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); /* fwdns_prio100 prog does not read delivery_time_type, so * kernel puts the (rcv) timetamp in __sk_buff->tstamp @@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_DGRAM, addr, 0); + test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); /* non mono delivery time is not forwarded */ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P100)); + dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 06f300d06dbd..b596479a9ebe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -115,6 +117,19 @@ static bool bpf_fwd(void) return test < TCP_IP4_RT_FWD; } +static __u8 get_proto(void) +{ + switch (test) { + case UDP_IP4: + case UDP_IP6: + case UDP_IP4_RT_FWD: + case UDP_IP6_RT_FWD: + return IPPROTO_UDP; + default: + return IPPROTO_TCP; + } +} + /* -1: parse error: TC_ACT_SHOT * 0: not testing traffic: TC_ACT_OK * >0: first byte is the inet_proto, second byte has the netns @@ -122,11 +137,16 @@ static bool bpf_fwd(void) */ static int skb_get_type(struct __sk_buff *skb) { + __u16 dst_ns_port = __bpf_htons(50000 + test); void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); __u8 inet_proto = 0, ns = 0; struct ipv6hdr *ip6h; + __u16 sport, dport; struct iphdr *iph; + struct tcphdr *th; + struct udphdr *uh; + void *trans; switch (skb->protocol) { case __bpf_htons(ETH_P_IP): @@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb) else if (iph->saddr == ip4_dst) ns = DST_NS; inet_proto = iph->protocol; + trans = iph + 1; break; case __bpf_htons(ETH_P_IPV6): ip6h = data + sizeof(struct ethhdr); @@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb) else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) ns = DST_NS; inet_proto = ip6h->nexthdr; + trans = ip6h + 1; break; default: return 0; } - if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + /* skb is not from src_ns or dst_ns. + * skb is not the testing IPPROTO. + */ + if (!ns || inet_proto != get_proto()) return 0; - return (ns << 8 | inet_proto); + switch (inet_proto) { + case IPPROTO_TCP: + th = trans; + if (th + 1 > data_end) + return -1; + sport = th->source; + dport = th->dest; + break; + case IPPROTO_UDP: + uh = trans; + if (uh + 1 > data_end) + return -1; + sport = uh->source; + dport = uh->dest; + break; + default: + return 0; + } + + /* The skb is the testing traffic */ + if ((ns == SRC_NS && dport == dst_ns_port) || + (ns == DST_NS && sport == dst_ns_port)) + return (ns << 8 | inet_proto); + + return 0; } /* format: direction@iface@netns -- cgit v1.2.3-59-g8ed1b From 02f4afebf8a54ba16f99f4f6ca10df3efeac6229 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 2 Jun 2022 11:25:07 +0800 Subject: selftests/bpf: Add drv mode testing for xdping As subject, we only test SKB mode for xdping at present. Now add DRV mode for xdping. Signed-off-by: Hangbin Liu Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220602032507.464453-1-liuhangbin@gmail.com --- tools/testing/selftests/bpf/test_xdping.sh | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh index c2f0ddb45531..c3d82e0a7378 100755 --- a/tools/testing/selftests/bpf/test_xdping.sh +++ b/tools/testing/selftests/bpf/test_xdping.sh @@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do test "$client_args" "$server_args" done +# Test drv mode +test "-I veth1 -N" "-I veth0 -s -N" +test "-I veth1 -N -c 10" "-I veth0 -s -N" + echo "OK. All tests passed" exit 0 -- cgit v1.2.3-59-g8ed1b From d932815a4394b6e8e861f75600666db40c706b8b Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:26:57 -0700 Subject: selftests/bpf: Fix selftests failure The kflag is supported now for BTF_KIND_ENUM. So remove the test which tests verifier failure due to existence of kflag. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062657.3723737-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ba5bde53d418..8e068e06b3e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -2896,26 +2896,6 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid btf_info kind_flag", }, -{ - .descr = "invalid enum kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - { .descr = "valid fwd kind_flag", .raw_types = { -- cgit v1.2.3-59-g8ed1b From 2b7301457ffe7be085744fa49f1244a71c1c6f1d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:27:03 -0700 Subject: selftests/bpf: Test new enum kflag and enum64 API functions Add tests to use the new enum kflag and enum64 API functions in selftest btf_write. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062703.3724287-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/btf_helpers.c | 25 +++- tools/testing/selftests/bpf/prog_tests/btf_write.c | 126 +++++++++++++++------ 2 files changed, 114 insertions(+), 37 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5941d514e17..1c1c2c26690a 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TYPE_TAG) + if (kind > BTF_KIND_ENUM64) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) } case BTF_KIND_ENUM: { const struct btf_enum *v = btf_enum(t); + const char *fmt_str; - fprintf(out, " size=%u vlen=%u", t->size, vlen); + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u"; + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); for (i = 0; i < vlen; i++, v++) { - fprintf(out, "\n\t'%s' val=%u", + fprintf(out, fmt_str, btf_str(btf, v->name_off), v->val); } break; } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + const char *fmt_str; + + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu"; + + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, fmt_str, + btf_str(btf, v->name_off), + ((__u64)v->val_hi32 << 32) | v->val_lo32); + } + break; + } case BTF_KIND_FWD: fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); break; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index addf99c05896..6e36de1302fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf) const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; + const struct btf_enum64 *v64; const struct btf_enum *v; const struct btf_param *p; int id, err, str_off; @@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf) ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); ASSERT_STREQ(btf_type_raw_dump(btf, 9), - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "raw_dump"); @@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); ASSERT_STREQ(btf_type_raw_dump(btf, 12), - "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(t->type, 1, "tag_type"); ASSERT_STREQ(btf_type_raw_dump(btf, 20), "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); + + /* ENUM64 */ + id = btf__add_enum64(btf, "e1", 8, true); + ASSERT_EQ(id, 21, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", -1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */ + ASSERT_OK(err, "v2_res"); + t = btf__type_by_id(btf, 21); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + v64 = btf_enum64(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name"); + ASSERT_EQ(v64->val_hi32, 0x1, "v2_val"); + ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 21), + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", "raw_dump"); + + id = btf__add_enum64(btf, "e1", 8, false); + ASSERT_EQ(id, 22, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */ + ASSERT_OK(err, "v1_res"); + t = btf__type_by_id(btf, 22); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 22), + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", "raw_dump"); } static void test_btf_add() @@ -332,12 +375,12 @@ static void test_btf_add() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -348,7 +391,12 @@ static void test_btf_add() "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1"); + "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); btf__free(btf); } @@ -370,7 +418,7 @@ static void test_btf_add_btf() gen_btf(btf2); id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 21, "id")) + if (!ASSERT_EQ(id, 23, "id")) goto cleanup; VALIDATE_RAW_BTF( @@ -386,12 +434,12 @@ static void test_btf_add_btf() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -403,36 +451,46 @@ static void test_btf_add_btf() "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", /* types appended from the second BTF */ - "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[22] PTR '(anon)' type_id=21", - "[23] CONST '(anon)' type_id=25", - "[24] VOLATILE '(anon)' type_id=23", - "[25] RESTRICT '(anon)' type_id=24", - "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", - "[27] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=21 bits_offset=0\n" - "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", - "[28] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", - "[29] ENUM 'e1' size=4 vlen=2\n" + "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[24] PTR '(anon)' type_id=23", + "[25] CONST '(anon)' type_id=27", + "[26] VOLATILE '(anon)' type_id=25", + "[27] RESTRICT '(anon)' type_id=26", + "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10", + "[29] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=23 bits_offset=0\n" + "\t'f2' type_id=23 bits_offset=32 bitfield_size=16", + "[30] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=23 bits_offset=0 bitfield_size=16", + "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", - "[30] FWD 'struct_fwd' fwd_kind=struct", - "[31] FWD 'union_fwd' fwd_kind=union", - "[32] ENUM 'enum_fwd' size=4 vlen=0", - "[33] TYPEDEF 'typedef1' type_id=21", - "[34] FUNC 'func1' type_id=35 linkage=global", - "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" - "\t'p1' type_id=21\n" - "\t'p2' type_id=22", - "[36] VAR 'var1' type_id=21, linkage=global-alloc", - "[37] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=21 offset=4 size=8", - "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", - "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", - "[40] TYPE_TAG 'tag1' type_id=21"); + "[32] FWD 'struct_fwd' fwd_kind=struct", + "[33] FWD 'union_fwd' fwd_kind=union", + "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", + "[35] TYPEDEF 'typedef1' type_id=23", + "[36] FUNC 'func1' type_id=37 linkage=global", + "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n" + "\t'p1' type_id=23\n" + "\t'p2' type_id=24", + "[38] VAR 'var1' type_id=23, linkage=global-alloc", + "[39] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=23 offset=4 size=8", + "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1", + "[41] DECL_TAG 'tag2' type_id=36 component_idx=1", + "[42] TYPE_TAG 'tag1' type_id=23", + "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); cleanup: btf__free(btf1); -- cgit v1.2.3-59-g8ed1b From 3b5325186dfad5ad2b2d8f7e8a79662de1b2749d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:27:08 -0700 Subject: selftests/bpf: Add BTF_KIND_ENUM64 unit tests Add unit tests for basic BTF_KIND_ENUM64 encoding. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062708.3724845-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 36 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_btf.h | 1 + 2 files changed, 37 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8e068e06b3e8..a986ee56c5f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4052,6 +4052,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Type tags don't precede modifiers", }, +{ + .descr = "enum64 test #1, unsigned, size 8", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "enum64 test #2, signed, size 4", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, -1, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 128989bed8b7..38782bd47fdc 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -39,6 +39,7 @@ #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32) #define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ ((bitfield_size) << 24 | (bits_offset)) -- cgit v1.2.3-59-g8ed1b From adc26d134ef3454c3d8ffb75ee6ca20c169b23d0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:27:13 -0700 Subject: selftests/bpf: Test BTF_KIND_ENUM64 for deduplication Add a few unit tests for BTF_KIND_ENUM64 deduplication. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062713.3725409-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 97 +++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index a986ee56c5f7..edb387163baa 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -7016,9 +7016,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, .expect = { .raw_types = { @@ -7046,9 +7049,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, }, { @@ -7509,6 +7515,91 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0tag1\0t\0m"), }, }, +{ + .descr = "dedup: enum64, standalone", + .input = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum64, fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum64 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [2] full enum64 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [3] full enum64 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [4] fwd enum64 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [5] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [2] full enum64 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [3] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, +}, +{ + .descr = "dedup: enum and enum64, no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, }; @@ -7533,6 +7624,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: -- cgit v1.2.3-59-g8ed1b From f4db3dd5284d9e0be2abc2f6e1dbdfe93da5681c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 6 Jun 2022 23:27:18 -0700 Subject: selftests/bpf: Add a test for enum64 value relocations Add a test for enum64 value relocations. The test will be skipped if clang version is 14 or lower since enum64 is only supported from version 15. Acked-by: Andrii Nakryiko Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220607062718.3726307-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 58 ++++++++++++++++ .../bpf/progs/btf__core_reloc_enum64val.c | 3 + .../bpf/progs/btf__core_reloc_enum64val___diff.c | 3 + .../btf__core_reloc_enum64val___err_missing.c | 3 + .../btf__core_reloc_enum64val___val3_missing.c | 3 + .../testing/selftests/bpf/progs/core_reloc_types.h | 78 ++++++++++++++++++++++ .../bpf/progs/test_core_reloc_enum64val.c | 70 +++++++++++++++++++ 7 files changed, 218 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 3712dfe1be59..47c1ef117275 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -363,6 +363,25 @@ static int duration = 0; .fails = true, \ } +#define ENUM64VAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enum64val.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_enum64val" + +#define ENUM64VAL_CASE(name, ...) { \ + ENUM64VAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enum64val_output), \ +} + +#define ENUM64VAL_ERR_CASE(name) { \ + ENUM64VAL_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); @@ -831,6 +850,45 @@ static const struct core_reloc_test_case test_cases[] = { .anon_val2 = 0x222, }), ENUMVAL_ERR_CASE(enumval___err_missing), + + /* 64bit enumerator value existence and value relocations */ + ENUM64VAL_CASE(enum64val, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x1ffffffffULL, + .unsigned_val2 = 0x2, + .signed_val1 = 0x1ffffffffLL, + .signed_val2 = -2, + }), + ENUM64VAL_CASE(enum64val___diff, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x101ffffffffULL, + .unsigned_val2 = 0x202ffffffffULL, + .signed_val1 = -101, + .signed_val2 = -202, + }), + ENUM64VAL_CASE(enum64val___val3_missing, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = false, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = false, + .unsigned_val1 = 0x111ffffffffULL, + .unsigned_val2 = 0x222, + .signed_val1 = 0x111ffffffffLL, + .signed_val2 = -222, + }), + ENUM64VAL_ERR_CASE(enum64val___err_missing), }; struct data { diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c new file mode 100644 index 000000000000..888e79db6a77 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c new file mode 100644 index 000000000000..194749130d87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c new file mode 100644 index 000000000000..3d732d4193e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c new file mode 100644 index 000000000000..17cf5d6a848d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index f9dc9766546e..26e103302c05 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -1117,6 +1117,20 @@ struct core_reloc_enumval_output { int anon_val2; }; +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + enum named_enum { NAMED_ENUM_VAL1 = 1, NAMED_ENUM_VAL2 = 2, @@ -1134,6 +1148,23 @@ struct core_reloc_enumval { anon_enum f2; }; +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val { + enum named_unsigned_enum64 f1; + enum named_signed_enum64 f2; +}; + /* differing enumerator values */ enum named_enum___diff { NAMED_ENUM_VAL1___diff = 101, @@ -1152,6 +1183,23 @@ struct core_reloc_enumval___diff { anon_enum___diff f2; }; +enum named_unsigned_enum64___diff { + UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL, + UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL, + UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL, +}; + +enum named_signed_enum64___diff { + SIGNED_ENUM64_VAL1___diff = -101, + SIGNED_ENUM64_VAL2___diff = -202, + SIGNED_ENUM64_VAL3___diff = -303, +}; + +struct core_reloc_enum64val___diff { + enum named_unsigned_enum64___diff f1; + enum named_signed_enum64___diff f2; +}; + /* missing (optional) third enum value */ enum named_enum___val3_missing { NAMED_ENUM_VAL1___val3_missing = 111, @@ -1168,6 +1216,21 @@ struct core_reloc_enumval___val3_missing { anon_enum___val3_missing f2; }; +enum named_unsigned_enum64___val3_missing { + UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL, + UNSIGNED_ENUM64_VAL2___val3_missing = 0x222, +}; + +enum named_signed_enum64___val3_missing { + SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL, + SIGNED_ENUM64_VAL2___val3_missing = -222, +}; + +struct core_reloc_enum64val___val3_missing { + enum named_unsigned_enum64___val3_missing f1; + enum named_signed_enum64___val3_missing f2; +}; + /* missing (mandatory) second enum value, should fail */ enum named_enum___err_missing { NAMED_ENUM_VAL1___err_missing = 1, @@ -1183,3 +1246,18 @@ struct core_reloc_enumval___err_missing { enum named_enum___err_missing f1; anon_enum___err_missing f2; }; + +enum named_unsigned_enum64___err_missing { + UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL, +}; + +enum named_signed_enum64___err_missing { + SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL, + SIGNED_ENUM64_VAL3___err_missing = -3, +}; + +struct core_reloc_enum64val___err_missing { + enum named_unsigned_enum64___err_missing f1; + enum named_signed_enum64___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c new file mode 100644 index 000000000000..63147fbfae6e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enum64val(void *ctx) +{ +#if __clang_major__ >= 15 + struct core_reloc_enum64val_output *out = (void *)&data.out; + enum named_unsigned_enum64 named_unsigned = 0; + enum named_signed_enum64 named_signed = 0; + + out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2); + out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3); + out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2); + out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3); + + out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2); + out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2); + /* NAMED_ENUM64_VAL3 value is optional */ + +#else + data.skip = true; +#endif + + return 0; +} -- cgit v1.2.3-59-g8ed1b From 89eda98428ce10f8df110d60aa934aa5c5170686 Mon Sep 17 00:00:00 2001 From: Feng Zhou Date: Fri, 10 Jun 2022 10:33:08 +0800 Subject: selftest/bpf/benchs: Add bpf_map benchmark Add benchmark for hash_map to reproduce the worst case that non-stop update when map's free is zero. Just like this: ./run_bench_bpf_hashmap_full_update.sh Setting up benchmark 'bpf-hashmap-ful-update'... Benchmark 'bpf-hashmap-ful-update' started. 1:hash_map_full_perf 555830 events per sec ... Signed-off-by: Feng Zhou Link: https://lore.kernel.org/r/20220610023308.93798-3-zhoufeng.zf@bytedance.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bench.c | 2 + .../bpf/benchs/bench_bpf_hashmap_full_update.c | 96 ++++++++++++++++++++++ .../benchs/run_bench_bpf_hashmap_full_update.sh | 11 +++ .../bpf/progs/bpf_hashmap_full_update_bench.c | 40 +++++++++ 5 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh create mode 100644 tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d3c8c8f558a..8ad7a733a505 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -560,6 +560,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -571,7 +572,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_ringbufs.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ - $(OUTPUT)/bench_strncmp.o + $(OUTPUT)/bench_strncmp.o \ + $(OUTPUT)/bench_bpf_hashmap_full_update.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f061cc20e776..d8aa62be996b 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -396,6 +396,7 @@ extern const struct bench bench_hashmap_with_bloom; extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; +extern const struct bench bench_bpf_hashmap_full_update; static const struct bench *benchs[] = { &bench_count_global, @@ -430,6 +431,7 @@ static const struct bench *benchs[] = { &bench_bpf_loop, &bench_strncmp_no_helper, &bench_strncmp_helper, + &bench_bpf_hashmap_full_update, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 000000000000..cec51e0ff4b8 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-ful-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 000000000000..1e2de838f9fa --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c new file mode 100644 index 000000000000..56957557e3e1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define MAX_ENTRIES 1000 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} hash_map_bench SEC(".maps"); + +u64 __attribute__((__aligned__(256))) percpu_time[256]; +u64 nr_loops; + +static int loop_update_callback(__u32 index, u32 *key) +{ + u64 init_val = 1; + + bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 key = cpu + MAX_ENTRIES; + u64 start_time = bpf_ktime_get_ns(); + + bpf_loop(nr_loops, loop_update_callback, &key, 0); + percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time; + return 0; +} -- cgit v1.2.3-59-g8ed1b From 96752e1ec0e0d763ccb05dfd0b6efe43a1a74f1f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 13 Jun 2022 16:34:49 -0700 Subject: selftests/bpf: Fix test_varlen verification failure with latest llvm With latest llvm15, test_varlen failed with the following verifier log: 17: (85) call bpf_probe_read_kernel_str#115 ; R0_w=scalar(smin=-4095,smax=256) 18: (bf) r1 = r0 ; R0_w=scalar(id=1,smin=-4095,smax=256) R1_w=scalar(id=1,smin=-4095,smax=256) 19: (67) r1 <<= 32 ; R1_w=scalar(smax=1099511627776,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32_max=) 20: (bf) r2 = r1 ; R1_w=scalar(id=2,smax=1099511627776,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32) 21: (c7) r2 s>>= 32 ; R2=scalar(smin=-2147483648,smax=256) ; if (len >= 0) { 22: (c5) if r2 s< 0x0 goto pc+7 ; R2=scalar(umax=256,var_off=(0x0; 0x1ff)) ; payload4_len1 = len; 23: (18) r2 = 0xffffc90000167418 ; R2_w=map_value(off=1048,ks=4,vs=1572,imm=0) 25: (63) *(u32 *)(r2 +0) = r0 ; R0=scalar(id=1,smin=-4095,smax=256) R2_w=map_value(off=1048,ks=4,vs=1572,imm=0) 26: (77) r1 >>= 32 ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) ; payload += len; 27: (18) r6 = 0xffffc90000167424 ; R6_w=map_value(off=1060,ks=4,vs=1572,imm=0) 29: (0f) r6 += r1 ; R1_w=Pscalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6_w=map_value(off=1060,ks=4,vs=1572,umax=4294967295,var_off=(0) ; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); 30: (bf) r1 = r6 ; R1_w=map_value(off=1060,ks=4,vs=1572,umax=4294967295,var_off=(0x0; 0xffffffff)) R6_w=map_value(off=1060,ks=4,vs=1572,um) 31: (b7) r2 = 256 ; R2_w=256 32: (18) r3 = 0xffffc90000164100 ; R3_w=map_value(off=256,ks=4,vs=1056,imm=0) 34: (85) call bpf_probe_read_kernel_str#115 R1 unbounded memory access, make sure to bounds check any such access processed 27 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 1 -- END PROG LOAD LOG -- libbpf: failed to load program 'handler32_signed' The failure is due to 20: (bf) r2 = r1 ; R1_w=scalar(id=2,smax=1099511627776,umax=18446744069414584320,var_off=(0x0; 0xffffffff00000000),s32_min=0,s32_max=0,u32) 21: (c7) r2 s>>= 32 ; R2=scalar(smin=-2147483648,smax=256) 22: (c5) if r2 s< 0x0 goto pc+7 ; R2=scalar(umax=256,var_off=(0x0; 0x1ff)) 26: (77) r1 >>= 32 ; R1_w=scalar(umax=4294967295,var_off=(0x0; 0xffffffff)) 29: (0f) r6 += r1 ; R1_w=Pscalar(umax=4294967295,var_off=(0x0; 0xffffffff)) R6_w=map_value(off=1060,ks=4,vs=1572,umax=4294967295,var_off=(0) where r1 has conservative value range compared to r2 and r1 is used later. In llvm, commit [1] triggered the above code generation and caused verification failure. It may take a while for llvm to address this issue. In the main time, let us change the variable 'len' type to 'long' and adjust condition properly. Tested with llvm14 and latest llvm15, both worked fine. [1] https://reviews.llvm.org/D126647 Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220613233449.2860753-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/test_varlen.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 913acdffd90f..3987ff174f1f 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -41,20 +41,20 @@ int handler64_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload1; - u64 len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) return 0; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len2 = len; } @@ -123,7 +123,7 @@ int handler32_signed(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload4; - int len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) -- cgit v1.2.3-59-g8ed1b From 3831cd1f9ff627734096f22d8e37f72a5cabf92e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 13 Jun 2022 22:55:26 -0700 Subject: selftests/bpf: Avoid skipping certain subtests Commit 704c91e59fe0 ('selftests/bpf: Test "bpftool gen min_core_btf"') added a test test_core_btfgen to test core relocation with btf generated with 'bpftool gen min_core_btf'. Currently, among 76 subtests, 25 are skipped. ... #46/69 core_reloc_btfgen/enumval:OK #46/70 core_reloc_btfgen/enumval___diff:OK #46/71 core_reloc_btfgen/enumval___val3_missing:OK #46/72 core_reloc_btfgen/enumval___err_missing:SKIP #46/73 core_reloc_btfgen/enum64val:OK #46/74 core_reloc_btfgen/enum64val___diff:OK #46/75 core_reloc_btfgen/enum64val___val3_missing:OK #46/76 core_reloc_btfgen/enum64val___err_missing:SKIP ... #46 core_reloc_btfgen:SKIP Summary: 1/51 PASSED, 25 SKIPPED, 0 FAILED Alexei found that in the above core_reloc_btfgen/enum64val___err_missing should not be skipped. Currently, the core_reloc tests have some negative tests. In Commit 704c91e59fe0, for core_reloc_btfgen, all negative tests are skipped with the following condition if (!test_case->btf_src_file || test_case->fails) { test__skip(); continue; } This is too conservative. Negative tests do not fail mkstemp() and run_btfgen() should not be skipped. There are a few negative tests indeed failing run_btfgen() and this patch added 'run_btfgen_fails' to mark these tests so that they can be skipped for btfgen tests. With this, we have ... #46/69 core_reloc_btfgen/enumval:OK #46/70 core_reloc_btfgen/enumval___diff:OK #46/71 core_reloc_btfgen/enumval___val3_missing:OK #46/72 core_reloc_btfgen/enumval___err_missing:OK #46/73 core_reloc_btfgen/enum64val:OK #46/74 core_reloc_btfgen/enum64val___diff:OK #46/75 core_reloc_btfgen/enum64val___val3_missing:OK #46/76 core_reloc_btfgen/enum64val___err_missing:OK ... Summary: 1/62 PASSED, 14 SKIPPED, 0 FAILED Totally 14 subtests are skipped instead of 25. Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220614055526.628299-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 47c1ef117275..2f92feb809be 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -84,6 +84,7 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -258,12 +259,14 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .raw_tp_name = "sys_enter", \ .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .prog_name = "test_core_bitfields_direct", \ } @@ -304,6 +307,7 @@ static int duration = 0; #define SIZE_ERR_CASE(name) { \ SIZE_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define TYPE_BASED_CASE_COMMON(name) \ @@ -396,6 +400,7 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool run_btfgen_fails; bool needs_testmod; bool relaxed_core_relocs; const char *prog_name; @@ -952,7 +957,7 @@ static void run_core_reloc_tests(bool use_btfgen) /* generate a "minimal" BTF file and use it as source */ if (use_btfgen) { - if (!test_case->btf_src_file || test_case->fails) { + if (!test_case->btf_src_file || test_case->run_btfgen_fails) { test__skip(); continue; } -- cgit v1.2.3-59-g8ed1b From 3e6fe5ce4d4860c3a111c246fddc6f31492f4fb0 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 15 Jun 2022 22:55:43 -0700 Subject: libbpf: Fix internal USDT address translation logic for shared libraries Perform the same virtual address to file offset translation that libbpf is doing for executable ELF binaries also for shared libraries. Currently libbpf is making a simplifying and sometimes wrong assumption that for shared libraries relative virtual addresses inside ELF are always equal to file offsets. Unfortunately, this is not always the case with LLVM's lld linker, which now by default generates quite more complicated ELF segments layout. E.g., for liburandom_read.so from selftests/bpf, here's an excerpt from readelf output listing ELF segments (a.k.a. program headers): Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align PHDR 0x000040 0x0000000000000040 0x0000000000000040 0x0001f8 0x0001f8 R 0x8 LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0005e4 0x0005e4 R 0x1000 LOAD 0x0005f0 0x00000000000015f0 0x00000000000015f0 0x000160 0x000160 R E 0x1000 LOAD 0x000750 0x0000000000002750 0x0000000000002750 0x000210 0x000210 RW 0x1000 LOAD 0x000960 0x0000000000003960 0x0000000000003960 0x000028 0x000029 RW 0x1000 Compare that to what is generated by GNU ld (or LLVM lld's with extra -znoseparate-code argument which disables this cleverness in the name of file size reduction): Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000550 0x000550 R 0x1000 LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000131 0x000131 R E 0x1000 LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x0000ac 0x0000ac R 0x1000 LOAD 0x002dc0 0x0000000000003dc0 0x0000000000003dc0 0x000262 0x000268 RW 0x1000 You can see from the first example above that for executable (Flg == "R E") PT_LOAD segment (LOAD #2), Offset doesn't match VirtAddr columns. And it does in the second case (GNU ld output). This is important because all the addresses, including USDT specs, operate in a virtual address space, while kernel is expecting file offsets when performing uprobe attach. So such mismatches have to be properly taken care of and compensated by libbpf, which is what this patch is fixing. Also patch clarifies few function and variable names, as well as updates comments to reflect this important distinction (virtaddr vs file offset) and to ephasize that shared libraries are not all that different from executables in this regard. This patch also changes selftests/bpf Makefile to force urand_read and liburand_read.so to be built with Clang and LLVM's lld (and explicitly request this ELF file size optimization through -znoseparate-code linker parameter) to validate libbpf logic and ensure regressions don't happen in the future. I've bundled these selftests changes together with libbpf changes to keep the above description tied with both libbpf and selftests changes. Fixes: 74cc6311cec9 ("libbpf: Add USDT notes parsing and resolution logic") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220616055543.3285835-1-andrii@kernel.org --- tools/lib/bpf/usdt.c | 123 ++++++++++++++++++----------------- tools/testing/selftests/bpf/Makefile | 14 ++-- 2 files changed, 72 insertions(+), 65 deletions(-) (limited to 'tools/testing') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index f1c9339cfbbc..5159207cbfd9 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz return 0; } -static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) { char path[PATH_MAX], line[PATH_MAX], mode[16]; size_t seg_start, seg_end, seg_off; @@ -531,35 +531,40 @@ err_out: return err; } -static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) { struct elf_seg *seg; int i; - if (relative) { - /* for shared libraries, address is relative offset and thus - * should be fall within logical offset-based range of - * [offset_start, offset_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) - return seg; - } - } else { - /* for binaries, address is absolute and thus should be within - * absolute address range of [seg_start, seg_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->start <= addr && addr < seg->end) - return seg; - } + /* for ELF binaries (both executables and shared libraries), we are + * given virtual address (absolute for executables, relative for + * libraries) which should match address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= virtaddr && virtaddr < seg->end) + return seg; } + return NULL; +} +static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) +{ + struct elf_seg *seg; + int i; + + /* for VMA segments from /proc//maps file, provided "address" is + * actually a file offset, so should be fall within logical + * offset-based range of [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) + return seg; + } return NULL; } -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); @@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { - size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; - struct elf_seg *segs = NULL, *lib_segs = NULL; + size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; @@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * struct elf_seg *seg = NULL; void *tmp; - err = parse_usdt_note(elf, path, base_addr, &nhdr, - data->d_buf, name_off, desc_off, ¬e); + err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); if (err) goto err_out; @@ -654,30 +658,29 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * usdt_rel_ip += base_addr - note.base_addr; } - if (ehdr.e_type == ET_EXEC) { - /* When attaching uprobes (which what USDTs basically - * are) kernel expects a relative IP to be specified, - * so if we are attaching to an executable ELF binary - * (i.e., not a shared library), we need to calculate - * proper relative IP based on ELF's load address - */ - seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); - if (!seg) { - err = -ESRCH; - pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", - usdt_provider, usdt_name, path, usdt_abs_ip); - goto err_out; - } - if (!seg->is_exec) { - err = -ESRCH; - pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", - path, seg->start, seg->end, usdt_provider, usdt_name, - usdt_abs_ip); - goto err_out; - } + /* When attaching uprobes (which is what USDTs basically are) + * kernel expects file offset to be specified, not a relative + * virtual address, so we need to translate virtual address to + * file offset, for both ET_EXEC and ET_DYN binaries. + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + /* translate from virtual address to file offset */ + usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; - usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); - } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { /* If we don't have BPF cookie support but need to * attach to a shared library, we'll need to know and * record absolute addresses of attach points due to @@ -697,9 +700,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - /* lib_segs are lazily initialized only if necessary */ - if (lib_seg_cnt == 0) { - err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + /* vma_segs are lazily initialized only if necessary */ + if (vma_seg_cnt == 0) { + err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", pid, path, err); @@ -707,7 +710,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * } } - seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", @@ -715,7 +718,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; } pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", @@ -723,7 +726,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); - /* Adjust semaphore address to be a relative offset */ + /* Adjust semaphore address to be a file offset */ if (note.sema_addr) { if (!man->has_sema_refcnt) { pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", @@ -732,7 +735,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + seg = find_elf_seg(segs, seg_cnt, note.sema_addr); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", @@ -747,7 +750,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + usdt_sema_off = note.sema_addr - seg->start + seg->offset; pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", @@ -770,7 +773,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; - /* notes->args references strings from Elf itself, so they can + /* notes.args references strings from Elf itself, so they can * be referenced safely until elf_end() call */ target->spec_str = note.args; @@ -788,7 +791,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err_out: free(segs); - free(lib_segs); + free(vma_segs); if (err < 0) free(targets); return err; @@ -1089,8 +1092,8 @@ err_out: /* Parse out USDT ELF note from '.note.stapsdt' section. * Logic inspired by perf's code. */ -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { const char *provider, *name, *args; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8ad7a733a505..e08e8e34e793 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -172,13 +172,15 @@ $(OUTPUT)/%:%.c # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ + -fuse-ld=lld -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -fuse-ld=lld -Wl,-znoseparate-code \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -580,6 +582,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ - $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ + no_alu32 bpf_gcc bpf_testmod.ko \ + liburandom_read.so) .PHONY: docs docs-clean -- cgit v1.2.3-59-g8ed1b From cb3f4a4a462b46eb5487ad806d58e34824c49044 Mon Sep 17 00:00:00 2001 From: Delyan Kratunov Date: Tue, 14 Jun 2022 23:10:44 +0000 Subject: selftests/bpf: add tests for sleepable (uk)probes Add tests that ensure sleepable uprobe programs work correctly. Add tests that ensure sleepable kprobe programs cannot attach. Also add tests that attach both sleepable and non-sleepable uprobe programs to the same location (i.e. same bpf_prog_array). Acked-by: Andrii Nakryiko Signed-off-by: Delyan Kratunov Link: https://lore.kernel.org/r/c744e5bb7a5c0703f05444dc41f2522ba3579a48.1655248076.git.delyank@fb.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/attach_probe.c | 49 +++++++++++++++++- .../selftests/bpf/progs/test_attach_probe.c | 60 ++++++++++++++++++++++ 2 files changed, 108 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 08c0601b3e84..0b899d2d8ea7 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -17,6 +17,14 @@ static void trigger_func2(void) asm volatile (""); } +/* attach point for byname sleepable uprobe */ +static void trigger_func3(void) +{ + asm volatile (""); +} + +static char test_data[] = "test_data"; + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -49,9 +57,17 @@ void test_attach_probe(void) if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) return; - skel = test_attach_probe__open_and_load(); + skel = test_attach_probe__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; @@ -151,6 +167,30 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* sleepable kprobes should not attach successfully */ + skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); + if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) + goto cleanup; + + /* test sleepable uprobe and uretprobe variants */ + skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) + goto cleanup; + + skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) + goto cleanup; + + skel->bss->user_ptr = test_data; + /* trigger & validate kprobe && kretprobe */ usleep(1); @@ -164,6 +204,9 @@ void test_attach_probe(void) /* trigger & validate uprobe attached by name */ trigger_func2(); + /* trigger & validate sleepable uprobe attached by name */ + trigger_func3(); + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); @@ -174,6 +217,10 @@ void test_attach_probe(void) ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index ce9acf4db8d2..f1c88ad368ef 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "bpf_misc.h" int kprobe_res = 0; @@ -17,6 +18,11 @@ int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; +int uprobe_byname3_sleepable_res = 0; +int uprobe_byname3_res = 0; +int uretprobe_byname3_sleepable_res = 0; +int uretprobe_byname3_res = 0; +void *user_ptr = 0; SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) @@ -32,6 +38,17 @@ int BPF_KPROBE(handle_kprobe_auto) return 0; } +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 2; + return 0; +} + SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { @@ -93,4 +110,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx) return 0; } +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.s//proc/self/exe:trigger_func3") +int handle_uprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_byname3_sleepable_res = 9; + return 0; +} + +/** + * same target as the uprobe.s above to force sleepable and non-sleepable + * programs in the same bpf_prog_array + */ +SEC("uprobe//proc/self/exe:trigger_func3") +int handle_uprobe_byname3(struct pt_regs *ctx) +{ + uprobe_byname3_res = 10; + return 0; +} + +SEC("uretprobe.s//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uretprobe_byname3_sleepable_res = 11; + return 0; +} + +SEC("uretprobe//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3(struct pt_regs *ctx) +{ + uretprobe_byname3_res = 12; + return 0; +} + + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From fb5cd0ce70d43b9bf589aa05aaa067350c3d3b26 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:45 +0300 Subject: selftests/bpf: Add selftests for raw syncookie helpers This commit adds selftests for the new BPF helpers: bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6}. xdp_synproxy_kern.c is a BPF program that generates SYN cookies on allowed TCP ports and sends SYNACKs to clients, accelerating synproxy iptables module. xdp_synproxy.c is a userspace control application that allows to configure the following options in runtime: list of allowed ports, MSS, window scale, TTL. A selftest is added to prog_tests that leverages the above programs to test the functionality of the new helpers. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-5-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 3 +- .../selftests/bpf/prog_tests/xdp_synproxy.c | 146 ++++ .../selftests/bpf/progs/xdp_synproxy_kern.c | 763 +++++++++++++++++++++ tools/testing/selftests/bpf/xdp_synproxy.c | 418 +++++++++++ 5 files changed, 1330 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c create mode 100644 tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c create mode 100644 tools/testing/selftests/bpf/xdp_synproxy.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 595565eb68c0..ca2f47f45670 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -43,3 +43,4 @@ test_cpp *.tmp xdpxceiver xdp_redirect_multi +xdp_synproxy diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e08e8e34e793..8b30bb743e24 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver xdp_redirect_multi + xdpxceiver xdp_redirect_multi xdp_synproxy TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -504,6 +504,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ + $(OUTPUT)/xdp_synproxy \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c new file mode 100644 index 000000000000..d9ee884c2a2b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include +#include + +#define CMD_OUT_BUF_SIZE 1023 + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +#define SYS_OUT(cmd) ({ \ + FILE *f = popen((cmd), "r"); \ + if (!ASSERT_OK_PTR(f, (cmd))) \ + goto out; \ + f; \ +}) + +/* out must be at least `size * 4 + 1` bytes long */ +static void escape_str(char *out, const char *in, size_t size) +{ + static const char *hex = "0123456789ABCDEF"; + size_t i; + + for (i = 0; i < size; i++) { + if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') { + *out++ = in[i]; + } else { + *out++ = '\\'; + *out++ = 'x'; + *out++ = hex[(in[i] >> 4) & 0xf]; + *out++ = hex[in[i] & 0xf]; + } + } + *out++ = '\0'; +} + +static bool expect_str(char *buf, size_t size, const char *str, const char *name) +{ + static char escbuf_expected[CMD_OUT_BUF_SIZE * 4]; + static char escbuf_actual[CMD_OUT_BUF_SIZE * 4]; + static int duration = 0; + bool ok; + + ok = size == strlen(str) && !memcmp(buf, str, size); + + if (!ok) { + escape_str(escbuf_expected, str, strlen(str)); + escape_str(escbuf_actual, buf, size); + } + CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n", + name, escbuf_actual, escbuf_expected); + + return ok; +} + +void test_xdp_synproxy(void) +{ + int server_fd = -1, client_fd = -1, accept_fd = -1; + struct nstoken *ns = NULL; + FILE *ctrl_file = NULL; + char buf[CMD_OUT_BUF_SIZE]; + size_t size; + + SYS("ip netns add synproxy"); + + SYS("ip link add tmp0 type veth peer name tmp1"); + SYS("ip link set tmp1 netns synproxy"); + SYS("ip link set tmp0 up"); + SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + + /* When checksum offload is enabled, the XDP program sees wrong + * checksums and drops packets. + */ + SYS("ethtool -K tmp0 tx off"); + /* Workaround required for veth. */ + SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + SYS("ip link set lo up"); + SYS("ip link set tmp1 up"); + SYS("ip addr replace 198.18.0.2/24 dev tmp1"); + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS("iptables -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 --single \ + --mss4 1460 --mss6 1440 --wscale 7 --ttl 64"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 0\n", + "initial SYNACKs")) + goto out; + + server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + close_netns(ns); + ns = NULL; + + client_fd = connect_to_fd(server_fd, 10000); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out; + + accept_fd = accept(server_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto out; + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 1\n", + "SYNACKs after connection")) + goto out; + +out: + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); + if (server_fd >= 0) + close(server_fd); + if (ns) + close_netns(ns); + + system("ip link del tmp0"); + system("ip netns del synproxy"); +} diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c new file mode 100644 index 000000000000..53b9865276a4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "vmlinux.h" + +#include +#include +#include + +#define NSEC_PER_SEC 1000000000L + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) + +#define IP_DF 0x4000 +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERM 4 +#define TCPOPT_TIMESTAMP 8 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_TIMESTAMP 10 + +#define TCP_TS_HZ 1000 +#define TS_OPT_WSCALE_MASK 0xf +#define TS_OPT_SACK (1 << 4) +#define TS_OPT_ECN (1 << 5) +#define TSBITS 6 +#define TSMASK (((__u32)1 << TSBITS) - 1) +#define TCP_MAX_WSCALE 14U + +#define IPV4_MAXLEN 60 +#define TCP_MAXLEN 60 + +#define DEFAULT_MSS4 1460 +#define DEFAULT_MSS6 1440 +#define DEFAULT_WSCALE 7 +#define DEFAULT_TTL 64 +#define MAX_ALLOWED_PORTS 8 + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 2); +} values SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u16); + __uint(max_entries, MAX_ALLOWED_PORTS); +} allowed_ports SEC(".maps"); + +extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, + struct bpf_sock_tuple *bpf_tuple, + __u32 len_tuple, + struct bpf_ct_opts *opts, + __u32 len_opts) __ksym; + +extern void bpf_ct_release(struct nf_conn *ct) __ksym; + +static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) +{ + __u8 tmp[ETH_ALEN]; + + __builtin_memcpy(tmp, a, ETH_ALEN); + __builtin_memcpy(a, b, ETH_ALEN); + __builtin_memcpy(b, tmp, ETH_ALEN); +} + +static __always_inline __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; +} + +static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __u32 csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s += proto + len; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s += (proto + len) << 8; +#else +#error Unknown endian +#endif + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __u32 csum) +{ + __u64 sum = csum; + int i; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)saddr->in6_u.u6_addr32[i]; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)daddr->in6_u.u6_addr32[i]; + + /* Don't combine additions to avoid 32-bit overflow. */ + sum += bpf_htonl(len); + sum += bpf_htonl(proto); + + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + + return csum_fold((__u32)sum); +} + +static __always_inline __u64 tcp_clock_ns(void) +{ + return bpf_ktime_get_ns(); +} + +static __always_inline __u32 tcp_ns_to_ts(__u64 ns) +{ + return ns / (NSEC_PER_SEC / TCP_TS_HZ); +} + +static __always_inline __u32 tcp_time_stamp_raw(void) +{ + return tcp_ns_to_ts(tcp_clock_ns()); +} + +struct tcpopt_context { + __u8 *ptr; + __u8 *end; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; +}; + +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 opcode, opsize; + + if (ctx->ptr >= ctx->end) + return 1; + if (ctx->ptr >= ctx->data_end) + return 1; + + opcode = ctx->ptr[0]; + + if (opcode == TCPOPT_EOL) + return 1; + if (opcode == TCPOPT_NOP) { + ++ctx->ptr; + return 0; + } + + if (ctx->ptr + 1 >= ctx->end) + return 1; + if (ctx->ptr + 1 >= ctx->data_end) + return 1; + opsize = ctx->ptr[1]; + if (opsize < 2) + return 1; + + if (ctx->ptr + opsize > ctx->end) + return 1; + + switch (opcode) { + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) + ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ + *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + + ctx->ptr += opsize; + + return 0; +} + +static int tscookie_tcpopt_parse_batch(__u32 index, void *context) +{ + int i; + + for (i = 0; i < 7; i++) + if (tscookie_tcpopt_parse(context)) + return 1; + return 0; +} + +static __always_inline bool tscookie_init(struct tcphdr *tcp_header, + __u16 tcp_len, __be32 *tsval, + __be32 *tsecr, void *data_end) +{ + struct tcpopt_context loop_ctx = { + .ptr = (__u8 *)(tcp_header + 1), + .end = (__u8 *)tcp_header + tcp_len, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, + }; + u32 cookie; + + bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); + + if (!loop_ctx.option_timestamp) + return false; + + cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; + if (loop_ctx.option_sack) + cookie |= TS_OPT_SACK; + if (tcp_header->ece && tcp_header->cwr) + cookie |= TS_OPT_ECN; + *tsval = bpf_htonl(cookie); + + return true; +} + +static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, + __u8 *ttl, bool ipv6) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value && *value != 0) { + if (ipv6) + *mss = (*value >> 32) & 0xffff; + else + *mss = *value & 0xffff; + *wscale = (*value >> 16) & 0xf; + *ttl = (*value >> 24) & 0xff; + return; + } + + *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; + *wscale = DEFAULT_WSCALE; + *ttl = DEFAULT_TTL; +} + +static __always_inline void values_inc_synacks(void) +{ + __u32 key = 1; + __u32 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value) + __sync_fetch_and_add(value, 1); +} + +static __always_inline bool check_port_allowed(__u16 port) +{ + __u32 i; + + for (i = 0; i < MAX_ALLOWED_PORTS; i++) { + __u32 key = i; + __u16 *value; + + value = bpf_map_lookup_elem(&allowed_ports, &key); + + if (!value) + break; + /* 0 is a terminator value. Check it first to avoid matching on + * a forbidden port == 0 and returning true. + */ + if (*value == 0) + break; + + if (*value == port) + return true; + } + + return false; +} + +struct header_pointers { + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + __u16 tcp_len; +}; + +static __always_inline int tcp_dissect(void *data, void *data_end, + struct header_pointers *hdr) +{ + hdr->eth = data; + if (hdr->eth + 1 > data_end) + return XDP_DROP; + + switch (bpf_ntohs(hdr->eth->h_proto)) { + case ETH_P_IP: + hdr->ipv6 = NULL; + + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv4 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) + return XDP_DROP; + if (hdr->ipv4->version != 4) + return XDP_DROP; + + if (hdr->ipv4->protocol != IPPROTO_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + break; + case ETH_P_IPV6: + hdr->ipv4 = NULL; + + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv6 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv6->version != 6) + return XDP_DROP; + + /* XXX: Extension headers are not supported and could circumvent + * XDP SYN flood protection. + */ + if (hdr->ipv6->nexthdr != NEXTHDR_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + break; + default: + /* XXX: VLANs will circumvent XDP SYN flood protection. */ + return XDP_PASS; + } + + if (hdr->tcp + 1 > data_end) + return XDP_DROP; + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_DROP; + + return XDP_TX; +} + +static __always_inline int tcp_lookup(struct xdp_md *ctx, struct header_pointers *hdr) +{ + struct bpf_ct_opts ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + __u32 tup_size; + + if (hdr->ipv4) { + /* TCP doesn't normally use fragments, and XDP can't reassemble + * them. + */ + if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + return XDP_DROP; + + tup.ipv4.saddr = hdr->ipv4->saddr; + tup.ipv4.daddr = hdr->ipv4->daddr; + tup.ipv4.sport = hdr->tcp->source; + tup.ipv4.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv4); + } else if (hdr->ipv6) { + __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); + __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = hdr->tcp->source; + tup.ipv6.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv6); + } else { + /* The verifier can't track that either ipv4 or ipv6 is not + * NULL. + */ + return XDP_ABORTED; + } + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (ct) { + unsigned long status = ct->status; + + bpf_ct_release(ct); + if (status & IPS_CONFIRMED_BIT) + return XDP_PASS; + } else if (ct_lookup_opts.error != -ENOENT) { + return XDP_ABORTED; + } + + /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + return XDP_TX; +} + +static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, + __u8 wscale) +{ + __be32 *start = buf; + + *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + + if (!tsopt) + return buf - start; + + if (tsopt[0] & bpf_htonl(1 << 4)) + *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + else + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *buf++ = tsopt[0]; + *buf++ = tsopt[1]; + + if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + wscale); + + return buf - start; +} + +static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header, + __u32 cookie, __be32 *tsopt, + __u16 mss, __u8 wscale) +{ + void *tcp_options; + + tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) + tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; + tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ + swap(tcp_header->source, tcp_header->dest); + tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); + tcp_header->seq = bpf_htonl(cookie); + tcp_header->window = 0; + tcp_header->urg_ptr = 0; + tcp_header->check = 0; /* Calculate checksum later. */ + + tcp_options = (void *)(tcp_header + 1); + tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); +} + +static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, false); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv4->saddr, hdr->ipv4->daddr); + hdr->ipv4->check = 0; /* Calculate checksum later. */ + hdr->ipv4->tos = 0; + hdr->ipv4->id = 0; + hdr->ipv4->ttl = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); +} + +static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, true); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv6->saddr, hdr->ipv6->daddr); + *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); + hdr->ipv6->hop_limit = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); +} + +static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, + struct xdp_md *ctx, + void *data, void *data_end) +{ + __u32 old_pkt_size, new_pkt_size; + /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the + * BPF verifier if tsopt is not volatile. Volatile forces it to store + * the pointer value and use it directly, otherwise tcp_mkoptions is + * (mis)compiled like this: + * if (!tsopt) + * return buf - start; + * reg = stored_return_value_of_tscookie_init; + * if (reg) + * tsopt = tsopt_buf; + * else + * tsopt = NULL; + * ... + * *buf++ = tsopt[1]; + * It creates a dead branch where tsopt is assigned NULL, but the + * verifier can't prove it's dead and blocks the program. + */ + __be32 * volatile tsopt = NULL; + __be32 tsopt_buf[2] = {}; + __u16 ip_len; + __u32 cookie; + __s64 value; + + /* Checksum is not yet verified, but both checksum failure and TCP + * header checks return XDP_DROP, so the order doesn't matter. + */ + if (hdr->tcp->fin || hdr->tcp->rst) + return XDP_DROP; + + /* Issue SYN cookies on allowed ports, drop SYN packets on blocked + * ports. + */ + if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) + return XDP_DROP; + + if (hdr->ipv4) { + /* Check the IPv4 and TCP checksums before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_fold(value) != 0) + return XDP_DROP; /* Bad IPv4 checksum. */ + + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv4); + + value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, + hdr->tcp_len); + } else if (hdr->ipv6) { + /* Check the TCP checksum before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv6); + + value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, + hdr->tcp_len); + } else { + return XDP_ABORTED; + } + + if (value < 0) + return XDP_ABORTED; + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, + &tsopt_buf[0], &tsopt_buf[1], data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers + * the check that the destination of the __builtin_memmove below + * doesn't overflow. + */ + if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + if (hdr->ipv4) { + if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { + struct tcphdr *new_tcp_header; + + new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); + __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); + hdr->tcp = new_tcp_header; + + hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; + } + + tcpv4_gen_synack(hdr, cookie, tsopt); + } else if (hdr->ipv6) { + tcpv6_gen_synack(hdr, cookie, tsopt); + } else { + return XDP_ABORTED; + } + + /* Recalculate checksums. */ + hdr->tcp->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (hdr->ipv4) { + hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr, + hdr->ipv4->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + + hdr->ipv4->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); + if (value < 0) + return XDP_ABORTED; + hdr->ipv4->check = csum_fold(value); + } else if (hdr->ipv6) { + hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr, + &hdr->ipv6->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + } else { + return XDP_ABORTED; + } + + /* Set the new packet size. */ + old_pkt_size = data_end - data; + new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + + values_inc_synacks(); + + return XDP_TX; +} + +static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) +{ + int err; + + if (hdr->tcp->rst) + return XDP_DROP; + + if (hdr->ipv4) + err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); + else if (hdr->ipv6) + err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); + else + return XDP_ABORTED; + if (err) + return XDP_DROP; + + return XDP_PASS; +} + +SEC("xdp") +int syncookie_xdp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + __s64 value; + int ret; + + struct bpf_ct_opts ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + + ret = tcp_dissect(data, data_end, &hdr); + if (ret != XDP_TX) + return ret; + + ret = tcp_lookup(ctx, &hdr); + if (ret != XDP_TX) + return ret; + + /* Packet is TCP and doesn't belong to an established connection. */ + + if ((hdr.tcp->syn ^ hdr.tcp->ack) != 1) + return XDP_DROP; + + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr.tcp_len + * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. + */ + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr.tcp_len)) + return XDP_ABORTED; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + if (hdr.ipv4) { + hdr.eth = data; + hdr.ipv4 = (void *)hdr.eth + sizeof(*hdr.eth); + /* IPV4_MAXLEN is needed when calculating checksum. + * At least sizeof(struct iphdr) is needed here to access ihl. + */ + if ((void *)hdr.ipv4 + IPV4_MAXLEN > data_end) + return XDP_ABORTED; + hdr.tcp = (void *)hdr.ipv4 + hdr.ipv4->ihl * 4; + } else if (hdr.ipv6) { + hdr.eth = data; + hdr.ipv6 = (void *)hdr.eth + sizeof(*hdr.eth); + hdr.tcp = (void *)hdr.ipv6 + sizeof(*hdr.ipv6); + } else { + return XDP_ABORTED; + } + + if ((void *)hdr.tcp + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + /* We run out of registers, tcp_len gets spilled to the stack, and the + * verifier forgets its min and max values checked above in tcp_dissect. + */ + hdr.tcp_len = hdr.tcp->doff * 4; + if (hdr.tcp_len < sizeof(*hdr.tcp)) + return XDP_ABORTED; + + return hdr.tcp->syn ? syncookie_handle_syn(&hdr, ctx, data, data_end) : + syncookie_handle_ack(&hdr); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c new file mode 100644 index 000000000000..4653d4655b5f --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int ifindex; +static __u32 attached_prog_id; + +static void noreturn cleanup(int sig) +{ + DECLARE_LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + int prog_fd; + int err; + + if (attached_prog_id == 0) + exit(0); + + prog_fd = bpf_prog_get_fd_by_id(attached_prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + err = bpf_xdp_attach(ifindex, -1, 0, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } else { + opts.old_prog_fd = prog_fd; + err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts); + close(prog_fd); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err)); + /* Not an error if already replaced by someone else. */ + if (err != -EEXIST) { + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } + } + exit(0); +} + +static noreturn void usage(const char *progname) +{ + fprintf(stderr, "Usage: %s [--iface |--prog ] [--mss4 --mss6 --wscale --ttl ] [--ports ,,...] [--single]\n", + progname); + exit(1); +} + +static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit) +{ + unsigned long res; + char *endptr; + + errno = 0; + res = strtoul(arg, &endptr, 10); + if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) + usage(progname); + + return res; +} + +static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, + __u64 *tcpipopts, char **ports, bool *single) +{ + static struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "iface", required_argument, NULL, 'i' }, + { "prog", required_argument, NULL, 'x' }, + { "mss4", required_argument, NULL, 4 }, + { "mss6", required_argument, NULL, 6 }, + { "wscale", required_argument, NULL, 'w' }, + { "ttl", required_argument, NULL, 't' }, + { "ports", required_argument, NULL, 'p' }, + { "single", no_argument, NULL, 's' }, + { NULL, 0, NULL, 0 }, + }; + unsigned long mss4, mss6, wscale, ttl; + unsigned int tcpipopts_mask = 0; + + if (argc < 2) + usage(argv[0]); + + *ifindex = 0; + *prog_id = 0; + *tcpipopts = 0; + *ports = NULL; + *single = false; + + while (true) { + int opt; + + opt = getopt_long(argc, argv, "", long_options, NULL); + if (opt == -1) + break; + + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'i': + *ifindex = if_nametoindex(optarg); + if (*ifindex == 0) + usage(argv[0]); + break; + case 'x': + *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); + if (*prog_id == 0) + usage(argv[0]); + break; + case 4: + mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 0; + break; + case 6: + mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 1; + break; + case 'w': + wscale = parse_arg_ul(argv[0], optarg, 14); + tcpipopts_mask |= 1 << 2; + break; + case 't': + ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); + tcpipopts_mask |= 1 << 3; + break; + case 'p': + *ports = optarg; + break; + case 's': + *single = true; + break; + default: + usage(argv[0]); + } + } + if (optind < argc) + usage(argv[0]); + + if (tcpipopts_mask == 0xf) { + if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) + usage(argv[0]); + *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; + } else if (tcpipopts_mask != 0) { + usage(argv[0]); + } + + if (*ifindex != 0 && *prog_id != 0) + usage(argv[0]); + if (*ifindex == 0 && *prog_id == 0) + usage(argv[0]); +} + +static int syncookie_attach(const char *argv0, unsigned int ifindex) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char xdp_filename[PATH_MAX]; + struct bpf_program *prog; + struct bpf_object *obj; + int prog_fd; + int err; + + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0); + obj = bpf_object__open_file(xdp_filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + prog = bpf_object__find_program_by_name(obj, "syncookie_xdp"); + if (!prog) { + fprintf(stderr, "Error: bpf_object__find_program_by_name: program syncookie_xdp was not found\n"); + return -ENOENT; + } + + prog_fd = bpf_program__fd(prog); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err < 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + attached_prog_id = info.id; + signal(SIGINT, cleanup); + signal(SIGTERM, cleanup); + err = bpf_xdp_attach(ifindex, prog_fd, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + attached_prog_id = 0; + goto out; + } + err = 0; +out: + bpf_object__close(obj); + return err; +} + +static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) +{ + struct bpf_prog_info prog_info; + __u32 map_ids[8]; + __u32 info_len; + int prog_fd; + int err; + int i; + + *values_map_fd = -1; + *ports_map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + return prog_fd; + } + + prog_info = (struct bpf_prog_info) { + .nr_map_ids = 8, + .map_ids = (__u64)map_ids, + }; + info_len = sizeof(prog_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + + if (prog_info.type != BPF_PROG_TYPE_XDP) { + fprintf(stderr, "Error: BPF prog type is not BPF_PROG_TYPE_XDP\n"); + err = -ENOENT; + goto out; + } + if (prog_info.nr_map_ids < 2) { + fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", + prog_info.nr_map_ids); + err = -ENOENT; + goto out; + } + + for (i = 0; i < prog_info.nr_map_ids; i++) { + struct bpf_map_info map_info = {}; + int map_fd; + + err = bpf_map_get_fd_by_id(map_ids[i]); + if (err < 0) { + fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); + goto err_close_map_fds; + } + map_fd = err; + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + close(map_fd); + goto err_close_map_fds; + } + if (strcmp(map_info.name, "values") == 0) { + *values_map_fd = map_fd; + continue; + } + if (strcmp(map_info.name, "allowed_ports") == 0) { + *ports_map_fd = map_fd; + continue; + } + close(map_fd); + } + + if (*values_map_fd != -1 && *ports_map_fd != -1) { + err = 0; + goto out; + } + + err = -ENOENT; + +err_close_map_fds: + if (*values_map_fd != -1) + close(*values_map_fd); + if (*ports_map_fd != -1) + close(*ports_map_fd); + *values_map_fd = -1; + *ports_map_fd = -1; + +out: + close(prog_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int values_map_fd, ports_map_fd; + __u64 tcpipopts; + bool firstiter; + __u64 prevcnt; + __u32 prog_id; + char *ports; + bool single; + int err = 0; + + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, &single); + + if (prog_id == 0) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", strerror(-err)); + goto out; + } + if (prog_id == 0) { + err = syncookie_attach(argv[0], ifindex); + if (err < 0) + goto out; + prog_id = attached_prog_id; + } + } + + err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); + if (err < 0) + goto out; + + if (ports) { + __u16 port_last = 0; + __u32 port_idx = 0; + char *p = ports; + + fprintf(stderr, "Replacing allowed ports\n"); + + while (p && *p != '\0') { + char *token = strsep(&p, ","); + __u16 port; + + port = parse_arg_ul(argv[0], token, UINT16_MAX); + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add port %u (index %u)\n", + port, port_idx); + goto out_close_maps; + } + fprintf(stderr, "Added port %u\n", port); + port_idx++; + } + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", + port_idx); + goto out_close_maps; + } + } + + if (tcpipopts) { + __u32 key = 0; + + fprintf(stderr, "Replacing TCP/IP options\n"); + + err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + } + + if ((ports || tcpipopts) && attached_prog_id == 0 && !single) + goto out_close_maps; + + prevcnt = 0; + firstiter = true; + while (true) { + __u32 key = 1; + __u64 value; + + err = bpf_map_lookup_elem(values_map_fd, &key, &value); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + if (firstiter) { + prevcnt = value; + firstiter = false; + } + if (single) { + printf("Total SYNACKs generated: %llu\n", value); + break; + } + printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value); + prevcnt = value; + sleep(1); + } + +out_close_maps: + close(values_map_fd); + close(ports_map_fd); +out: + return err == 0 ? 0 : 1; +} -- cgit v1.2.3-59-g8ed1b From 784d5dc0efc28bd0a52ccfedb707eba71d8bc8af Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 15 Jun 2022 16:48:47 +0300 Subject: selftests/bpf: Add selftests for raw syncookie helpers in TC mode This commit extends selftests for the new BPF helpers bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6} to also test the TC BPF functionality added in the previous commit. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/20220615134847.3753567-7-maximmi@nvidia.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/xdp_synproxy.c | 55 ++++++-- .../selftests/bpf/progs/xdp_synproxy_kern.c | 142 +++++++++++++++------ tools/testing/selftests/bpf/xdp_synproxy.c | 96 ++++++++++---- 3 files changed, 224 insertions(+), 69 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index d9ee884c2a2b..fb77a123fe89 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#define _GNU_SOURCE #include #include #include @@ -12,9 +13,11 @@ goto out; \ }) -#define SYS_OUT(cmd) ({ \ - FILE *f = popen((cmd), "r"); \ - if (!ASSERT_OK_PTR(f, (cmd))) \ +#define SYS_OUT(cmd, ...) ({ \ + char buf[1024]; \ + snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ + FILE *f = popen(buf, "r"); \ + if (!ASSERT_OK_PTR(f, buf)) \ goto out; \ f; \ }) @@ -57,9 +60,10 @@ static bool expect_str(char *buf, size_t size, const char *str, const char *name return ok; } -void test_xdp_synproxy(void) +static void test_synproxy(bool xdp) { int server_fd = -1, client_fd = -1, accept_fd = -1; + char *prog_id, *prog_id_end; struct nstoken *ns = NULL; FILE *ctrl_file = NULL; char buf[CMD_OUT_BUF_SIZE]; @@ -76,8 +80,9 @@ void test_xdp_synproxy(void) * checksums and drops packets. */ SYS("ethtool -K tmp0 tx off"); - /* Workaround required for veth. */ - SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); + if (xdp) + /* Workaround required for veth. */ + SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); ns = open_netns("synproxy"); if (!ASSERT_OK_PTR(ns, "setns")) @@ -97,14 +102,34 @@ void test_xdp_synproxy(void) SYS("iptables -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); - ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 --single \ - --mss4 1460 --mss6 1440 --wscale 7 --ttl 64"); + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ + --single --mss4 1460 --mss6 1440 \ + --wscale 7 --ttl 64%s", xdp ? "" : " --tc"); size = fread(buf, 1, sizeof(buf), ctrl_file); pclose(ctrl_file); if (!expect_str(buf, size, "Total SYNACKs generated: 0\n", "initial SYNACKs")) goto out; + if (!xdp) { + ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + prog_id = memmem(buf, size, " id ", 4); + if (!ASSERT_OK_PTR(prog_id, "find prog id")) + goto out; + prog_id += 4; + if (!ASSERT_LT(prog_id, buf + size, "find prog id begin")) + goto out; + prog_id_end = prog_id; + while (prog_id_end < buf + size && *prog_id_end >= '0' && + *prog_id_end <= '9') + prog_id_end++; + if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end")) + goto out; + *prog_id_end = '\0'; + } + server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0); if (!ASSERT_GE(server_fd, 0, "start_server")) goto out; @@ -124,7 +149,11 @@ void test_xdp_synproxy(void) if (!ASSERT_OK_PTR(ns, "setns")) goto out; - ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + if (xdp) + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + else + ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single", + prog_id); size = fread(buf, 1, sizeof(buf), ctrl_file); pclose(ctrl_file); if (!expect_str(buf, size, "Total SYNACKs generated: 1\n", @@ -144,3 +173,11 @@ out: system("ip link del tmp0"); system("ip netns del synproxy"); } + +void test_xdp_synproxy(void) +{ + if (test__start_subtest("xdp")) + test_synproxy(true); + if (test__start_subtest("tc")) + test_synproxy(false); +} diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 53b9865276a4..9fd62e94b5e6 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -7,6 +7,9 @@ #include #include +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + #define NSEC_PER_SEC 1000000000L #define ETH_ALEN 6 @@ -80,6 +83,12 @@ extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_ct_opts *opts, __u32 len_opts) __ksym; +extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, + struct bpf_sock_tuple *bpf_tuple, + u32 len_tuple, + struct bpf_ct_opts *opts, + u32 len_opts) __ksym; + extern void bpf_ct_release(struct nf_conn *ct) __ksym; static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) @@ -382,7 +391,7 @@ static __always_inline int tcp_dissect(void *data, void *data_end, return XDP_TX; } -static __always_inline int tcp_lookup(struct xdp_md *ctx, struct header_pointers *hdr) +static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) { struct bpf_ct_opts ct_lookup_opts = { .netns_id = BPF_F_CURRENT_NETNS, @@ -416,7 +425,10 @@ static __always_inline int tcp_lookup(struct xdp_md *ctx, struct header_pointers */ return XDP_ABORTED; } - ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (xdp) + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + else + ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); if (ct) { unsigned long status = ct->status; @@ -529,8 +541,9 @@ static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, } static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, - struct xdp_md *ctx, - void *data, void *data_end) + void *ctx, + void *data, void *data_end, + bool xdp) { __u32 old_pkt_size, new_pkt_size; /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the @@ -666,8 +679,13 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, /* Set the new packet size. */ old_pkt_size = data_end - data; new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; - if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) - return XDP_ABORTED; + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + } else { + if (bpf_skb_change_tail(ctx, new_pkt_size, 0)) + return XDP_ABORTED; + } values_inc_synacks(); @@ -693,71 +711,123 @@ static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) return XDP_PASS; } -SEC("xdp") -int syncookie_xdp(struct xdp_md *ctx) +static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; - struct header_pointers hdr; - __s64 value; - int ret; - struct bpf_ct_opts ct_lookup_opts = { .netns_id = BPF_F_CURRENT_NETNS, .l4proto = IPPROTO_TCP, }; + int ret; - ret = tcp_dissect(data, data_end, &hdr); + ret = tcp_dissect(data, data_end, hdr); if (ret != XDP_TX) return ret; - ret = tcp_lookup(ctx, &hdr); + ret = tcp_lookup(ctx, hdr, xdp); if (ret != XDP_TX) return ret; /* Packet is TCP and doesn't belong to an established connection. */ - if ((hdr.tcp->syn ^ hdr.tcp->ack) != 1) + if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) return XDP_DROP; - /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr.tcp_len + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. */ - if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr.tcp_len)) - return XDP_ABORTED; + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) + return XDP_ABORTED; + } else { + /* Without volatile the verifier throws this error: + * R9 32-bit pointer arithmetic prohibited + */ + volatile u64 old_len = data_end - data; - data_end = (void *)(long)ctx->data_end; - data = (void *)(long)ctx->data; + if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0)) + return XDP_ABORTED; + } + + return XDP_TX; +} - if (hdr.ipv4) { - hdr.eth = data; - hdr.ipv4 = (void *)hdr.eth + sizeof(*hdr.eth); +static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + if (hdr->ipv4) { + hdr->eth = data; + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); /* IPV4_MAXLEN is needed when calculating checksum. * At least sizeof(struct iphdr) is needed here to access ihl. */ - if ((void *)hdr.ipv4 + IPV4_MAXLEN > data_end) + if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) return XDP_ABORTED; - hdr.tcp = (void *)hdr.ipv4 + hdr.ipv4->ihl * 4; - } else if (hdr.ipv6) { - hdr.eth = data; - hdr.ipv6 = (void *)hdr.eth + sizeof(*hdr.eth); - hdr.tcp = (void *)hdr.ipv6 + sizeof(*hdr.ipv6); + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + } else if (hdr->ipv6) { + hdr->eth = data; + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); } else { return XDP_ABORTED; } - if ((void *)hdr.tcp + TCP_MAXLEN > data_end) + if ((void *)hdr->tcp + TCP_MAXLEN > data_end) return XDP_ABORTED; /* We run out of registers, tcp_len gets spilled to the stack, and the * verifier forgets its min and max values checked above in tcp_dissect. */ - hdr.tcp_len = hdr.tcp->doff * 4; - if (hdr.tcp_len < sizeof(*hdr.tcp)) + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) return XDP_ABORTED; - return hdr.tcp->syn ? syncookie_handle_syn(&hdr, ctx, data, data_end) : - syncookie_handle_ack(&hdr); + return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) : + syncookie_handle_ack(hdr); +} + +SEC("xdp") +int syncookie_xdp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(ctx, data, data_end, &hdr, true); + if (ret != XDP_TX) + return ret; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + return syncookie_part2(ctx, data, data_end, &hdr, true); +} + +SEC("tc") +int syncookie_tc(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(skb, data, data_end, &hdr, false); + if (ret != XDP_TX) + return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + + ret = syncookie_part2(skb, data, data_end, &hdr, false); + switch (ret) { + case XDP_PASS: + return TC_ACT_OK; + case XDP_TX: + return bpf_redirect(skb->ifindex, 0); + default: + return TC_ACT_SHOT; + } } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c index 4653d4655b5f..d874ddfb39c4 100644 --- a/tools/testing/selftests/bpf/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -18,16 +18,31 @@ static unsigned int ifindex; static __u32 attached_prog_id; +static bool attached_tc; static void noreturn cleanup(int sig) { - DECLARE_LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); int prog_fd; int err; if (attached_prog_id == 0) exit(0); + if (attached_tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + + err = bpf_tc_hook_destroy(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err)); + fprintf(stderr, "Failed to destroy the TC hook\n"); + exit(1); + } + exit(0); + } + prog_fd = bpf_prog_get_fd_by_id(attached_prog_id); if (prog_fd < 0) { fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); @@ -55,7 +70,7 @@ static void noreturn cleanup(int sig) static noreturn void usage(const char *progname) { - fprintf(stderr, "Usage: %s [--iface |--prog ] [--mss4 --mss6 --wscale --ttl ] [--ports ,,...] [--single]\n", + fprintf(stderr, "Usage: %s [--iface |--prog ] [--mss4 --mss6 --wscale --ttl ] [--ports ,,...] [--single] [--tc]\n", progname); exit(1); } @@ -74,7 +89,7 @@ static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigne } static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, - __u64 *tcpipopts, char **ports, bool *single) + __u64 *tcpipopts, char **ports, bool *single, bool *tc) { static struct option long_options[] = { { "help", no_argument, NULL, 'h' }, @@ -86,6 +101,7 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * { "ttl", required_argument, NULL, 't' }, { "ports", required_argument, NULL, 'p' }, { "single", no_argument, NULL, 's' }, + { "tc", no_argument, NULL, 'c' }, { NULL, 0, NULL, 0 }, }; unsigned long mss4, mss6, wscale, ttl; @@ -143,6 +159,9 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * case 's': *single = true; break; + case 'c': + *tc = true; + break; default: usage(argv[0]); } @@ -164,7 +183,7 @@ static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 * usage(argv[0]); } -static int syncookie_attach(const char *argv0, unsigned int ifindex) +static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc) { struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); @@ -188,9 +207,9 @@ static int syncookie_attach(const char *argv0, unsigned int ifindex) return err; } - prog = bpf_object__find_program_by_name(obj, "syncookie_xdp"); + prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp"); if (!prog) { - fprintf(stderr, "Error: bpf_object__find_program_by_name: program syncookie_xdp was not found\n"); + fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n"); return -ENOENT; } @@ -201,21 +220,50 @@ static int syncookie_attach(const char *argv0, unsigned int ifindex) fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); goto out; } + attached_tc = tc; attached_prog_id = info.id; signal(SIGINT, cleanup); signal(SIGTERM, cleanup); - err = bpf_xdp_attach(ifindex, prog_fd, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); - if (err < 0) { - fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); - signal(SIGINT, SIG_DFL); - signal(SIGTERM, SIG_DFL); - attached_prog_id = 0; - goto out; + if (tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, + .handle = 1, + .priority = 1, + .prog_fd = prog_fd); + + err = bpf_tc_hook_create(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_create: %s\n", + strerror(-err)); + goto fail; + } + err = bpf_tc_attach(&hook, &opts); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_attach: %s\n", + strerror(-err)); + goto fail; + } + + } else { + err = bpf_xdp_attach(ifindex, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", + strerror(-err)); + goto fail; + } } err = 0; out: bpf_object__close(obj); return err; +fail: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + attached_prog_id = 0; + goto out; } static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) @@ -248,11 +296,6 @@ static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports goto out; } - if (prog_info.type != BPF_PROG_TYPE_XDP) { - fprintf(stderr, "Error: BPF prog type is not BPF_PROG_TYPE_XDP\n"); - err = -ENOENT; - goto out; - } if (prog_info.nr_map_ids < 2) { fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", prog_info.nr_map_ids); @@ -319,17 +362,22 @@ int main(int argc, char *argv[]) char *ports; bool single; int err = 0; + bool tc; - parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, &single); + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, + &single, &tc); if (prog_id == 0) { - err = bpf_xdp_query_id(ifindex, 0, &prog_id); - if (err < 0) { - fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", strerror(-err)); - goto out; + if (!tc) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", + strerror(-err)); + goto out; + } } if (prog_id == 0) { - err = syncookie_attach(argv[0], ifindex); + err = syncookie_attach(argv[0], ifindex, tc); if (err < 0) goto out; prog_id = attached_prog_id; -- cgit v1.2.3-59-g8ed1b From 08c79c9cd67fffd0d5538ddbd3a97b0a865b5eb5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 16 Jun 2022 21:55:12 -0700 Subject: selftests/bpf: Don't force lld on non-x86 architectures LLVM's lld linker doesn't have a universal architecture support (e.g., it definitely doesn't work on s390x), so be safe and force lld for urandom_read and liburandom_read.so only on x86 architectures. This should fix s390x CI runs. Fixes: 3e6fe5ce4d48 ("libbpf: Fix internal USDT address translation logic for shared libraries") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220617045512.1339795-1-andrii@kernel.org --- tools/testing/selftests/bpf/Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 8b30bb743e24..cb8e552e1418 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -168,18 +168,25 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ - -fuse-ld=lld -Wl,-znoseparate-code -fPIC -shared -o $@ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ liburandom_read.so $(LDLIBS) \ - -fuse-ld=lld -Wl,-znoseparate-code \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code \ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) -- cgit v1.2.3-59-g8ed1b From e386a527fc0893dc905464890eddfeb2cbeb44f4 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 16 Jun 2022 13:42:38 +0300 Subject: selftests: mirror_gre_bridge_1q_lag: Enslave port to bridge before other configurations Using mlxsw driver, the configurations are offloaded just in case that there is a physical port which is enslaved to the virtual device (e.g., to a bridge). In 'mirror_gre_bridge_1q_lag' test, the bridge gets an address and route before there are ports in the bridge. It means that these configurations are not offloaded. Till now the test passes with mlxsw driver even that the RIF of the bridge is not in the hardware, because the ARP packets are trapped in layer 2 and also mirrored, so there is no real need of the RIF in hardware. The previous patch changed the traps 'ARP_REQUEST' and 'ARP_RESPONSE' to be done at layer 3 instead of layer 2. With this change the ARP packets are not trapped during the test, as the RIF is not in the hardware because of the order of configurations. Reorder the configurations to make them to be offloaded, then the test will pass with the change of the traps. Signed-off-by: Amit Cohen Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 28d568c48a73..91e431cd919e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -141,12 +141,13 @@ switch_create() ip link set dev $swp4 up ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - __addr_add_del br1 add 192.0.2.129/32 - ip -4 route add 192.0.2.130/32 dev br1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 + + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.129/32 + ip -4 route add 192.0.2.130/32 dev br1 } switch_destroy() -- cgit v1.2.3-59-g8ed1b From d3ffeb2dba633b99ef2602019f61a97e0163a756 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Jun 2022 13:42:39 +0300 Subject: selftests: mlxsw: resource_scale: Update scale target after test setup The scale of each resource is tested in the following manner: 1. The scale target is queried. 2. The test setup is prepared. 3. The test is invoked. In some cases, the occupancy of a resource changes as part of the second step, requiring the test to return a scale target that takes this change into account. Make this more robust by re-querying the scale target after the second step. Another possible solution is to swap the first and second steps, but when a test needs to be skipped (i.e., scale target is zero), the setup would have been in vain. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 3 +++ tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 3 +++ 2 files changed, 6 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index e9f65bd2e299..22f761442bad 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -38,6 +38,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do target=$(${current_test}_get_target "$should_fail") ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource changed + # following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" ${current_test}_cleanup devlink_reload diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index dea33dc93790..12201acc00b9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -43,6 +43,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do target=$(${current_test}_get_target "$should_fail") ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource + # changed following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" ${current_test}_cleanup if [[ "$should_fail" -eq 0 ]]; then -- cgit v1.2.3-59-g8ed1b From 3128b9f51ee7ec7d091496379247489aab3007bb Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 16 Jun 2022 13:42:40 +0300 Subject: selftests: mlxsw: resource_scale: Introduce traffic tests The scale tests are currently testing two things: that some number of instances of a given resource can actually be created; and that when an attempt is made to create more than the supported amount, the failures are noted and handled gracefully. However the ability to allocate the resource does not mean that the resource actually works when passing traffic. For that, make it possible for a given scale to also test traffic. Traffic test is only run on the positive leg of the scale test (no point trying to pass traffic when the expected outcome is that the resource will not be allocated). Traffic tests are opt-in, if a given test does not expose it, it is not run. To this end, delay the test cleanup until after the traffic test is run. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 12 ++++++++++-- .../selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 11 ++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 22f761442bad..6d7814ba3c03 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -42,13 +42,21 @@ for current_test in ${TESTS:-$ALL_TESTS}; do # following the test setup. target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup - devlink_reload if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]]; then + $tt "$target" + log_test "'$current_test' $target traffic test" + fi + fi else log_test "'$current_test' overflow $target" fi + ${current_test}_cleanup + devlink_reload RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 12201acc00b9..a1bc93b966ae 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -47,12 +47,21 @@ for current_test in ${TESTS:-$ALL_TESTS}; do # changed following the test setup. target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' [$profile] $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]] + then + $tt "$target" + log_test "'$current_test' [$profile] $target traffic test" + fi + fi else log_test "'$current_test' [$profile] overflow $target" fi + ${current_test}_cleanup RET_FIN=$(( RET_FIN || RET )) done done -- cgit v1.2.3-59-g8ed1b From 8cad339db339a39cb82b1188e4be4070a433abac Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 16 Jun 2022 13:42:41 +0300 Subject: selftests: mlxsw: resource_scale: Allow skipping a test The scale tests are currently testing two things: that some number of instances of a given resource can actually be created; and that when an attempt is made to create more than the supported amount, the failures are noted and handled gracefully. Sometimes the scale test depends on more than one resource. In particular, a following patch will add a RIF counter scale test, which depends on the number of RIF counters that can be bound, and also on the number of RIFs that can be created. When the test is limited by the auxiliary resource and not by the primary one, there's no point trying to run the overflow test, because it would be testing exhaustion of the wrong resource. To support this use case, when the $test_get_target yields 0, skip the test instead. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 5 +++++ tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 6d7814ba3c03..afe17b108b46 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -36,6 +36,11 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' should_fail=$should_fail test" + continue + fi + ${current_test}_setup_prepare setup_wait $num_netifs # Update target in case occupancy of a certain resource changed diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index a1bc93b966ae..c0da22cd7d20 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -41,6 +41,10 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' [$profile] should_fail=$should_fail test" + continue + fi ${current_test}_setup_prepare setup_wait $num_netifs # Update target in case occupancy of a certain resource -- cgit v1.2.3-59-g8ed1b From 35d5829e86c29892136ca96bd4d809d4429f1510 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 16 Jun 2022 13:42:42 +0300 Subject: selftests: mlxsw: resource_scale: Pass target count to cleanup The scale tests are verifying behavior of mlxsw when number of instances of some resource reaches the ASIC capacity. The number of instances is referred to as "target" number. No scale tests so far needed to know this target number to clean up. E.g. the tc_flower simply removes the clsact qdisc that all the tested filters are hooked onto, and that takes care of collecting all the filters. However, for the RIF counter test, which is being added in a future patch, VLAN netdevices are created. These are created as part of the test, but of course the cleanup needs to undo them again. For that it needs to know how many there were. To support this usage, pass the target number to the cleanup callback. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh | 2 +- tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index afe17b108b46..1a7a472edfd0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -60,7 +60,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' overflow $target" fi - ${current_test}_cleanup + ${current_test}_cleanup $target devlink_reload RET_FIN=$(( RET_FIN || RET )) done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index c0da22cd7d20..70c9da8fe303 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -65,7 +65,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do else log_test "'$current_test' [$profile] overflow $target" fi - ${current_test}_cleanup + ${current_test}_cleanup $target RET_FIN=$(( RET_FIN || RET )) done done -- cgit v1.2.3-59-g8ed1b From dd5d20e17c960dc5c8b8c585dfae79cf39660867 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 16 Jun 2022 13:42:43 +0300 Subject: selftests: mlxsw: tc_flower_scale: Add a traffic test Add a test that checks that the created filters do actually trigger on matching traffic. Exercising all the rules would be a very lengthy process. Instead, take a log2 subset of rules. The logic behind picking log2 rules is that then every bit of the instantiated item's number is exercised. This should catch issues whether they happen at the high end, low end, or somewhere in between. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/tc_flower_scale.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index aa74be9f47c8..d3d9e60d6ddf 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -77,6 +77,7 @@ tc_flower_rules_create() filter add dev $h2 ingress \ prot ipv6 \ pref 1000 \ + handle 42$i \ flower $tcflags dst_ip $(tc_flower_addr $i) \ action drop EOF @@ -121,3 +122,19 @@ tc_flower_test() tcflags="skip_sw" __tc_flower_test $count $should_fail } + +tc_flower_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count - 1; i > 0; i /= 2)); do + $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count - 1; i > 0; i /= 2)); do + tc_check_packets "dev $h2 ingress" 42$i 1 + check_err $? "Traffic not seen at rule #$i" + done +} -- cgit v1.2.3-59-g8ed1b From be00853bfd2e704893916bc349e7ab1d50615cb4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 16 Jun 2022 13:42:44 +0300 Subject: selftests: mlxsw: Add a RIF counter scale test This tests creates as many RIFs as possible, ideally more than there can be RIF counters (though that is currently only possible on Spectrum-1). It then tries to enable L3 HW stats on each of the RIFs. It also contains the traffic test, which tries to run traffic through a log2 of those counters and checks that the traffic is shown in the counter values. Like with tc_flower traffic test, take a log2 subset of rules. The logic behind picking log2 rules is that then every bit of the instantiated item's number is exercised. This should catch issues whether they happen at the high end, low end, or somewhere in between. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/rif_counter_scale.sh | 107 +++++++++++++++++++++ .../drivers/net/mlxsw/spectrum-2/resource_scale.sh | 11 ++- .../net/mlxsw/spectrum-2/rif_counter_scale.sh | 1 + .../drivers/net/mlxsw/spectrum/resource_scale.sh | 11 ++- .../net/mlxsw/spectrum/rif_counter_scale.sh | 34 +++++++ 5 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh create mode 120000 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh create mode 100644 tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh new file mode 100644 index 000000000000..a43a9926e690 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 + +RIF_COUNTER_NUM_NETIFS=2 + +rif_counter_addr4() +{ + local i=$1; shift + local p=$1; shift + + printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p)) +} + +rif_counter_addr4pfx() +{ + rif_counter_addr4 $@ + printf /30 +} + +rif_counter_h1_create() +{ + simple_if_init $h1 +} + +rif_counter_h1_destroy() +{ + simple_if_fini $h1 +} + +rif_counter_h2_create() +{ + simple_if_init $h2 +} + +rif_counter_h2_destroy() +{ + simple_if_fini $h2 +} + +rif_counter_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + rif_counter_h1_create + rif_counter_h2_create +} + +rif_counter_cleanup() +{ + local count=$1; shift + + pre_cleanup + + for ((i = 1; i <= count; i++)); do + vlan_destroy $h2 $i + done + + rif_counter_h2_destroy + rif_counter_h1_destroy + + vrf_cleanup + + if [[ -v RIF_COUNTER_BATCH_FILE ]]; then + rm -f $RIF_COUNTER_BATCH_FILE + fi +} + + +rif_counter_test() +{ + local count=$1; shift + local should_fail=$1; shift + + RIF_COUNTER_BATCH_FILE="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2) + done + for ((i = 1; i <= count; i++)); do + cat >> $RIF_COUNTER_BATCH_FILE <<-EOF + stats set dev $h2.$i l3_stats on + EOF + done + + ip -b $RIF_COUNTER_BATCH_FILE + check_err_fail $should_fail $? "RIF counter enablement" +} + +rif_counter_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count; i > 0; i /= 2)); do + $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(rif_counter_addr4 $i 1) \ + -B $(rif_counter_addr4 $i 2) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count; i > 0; i /= 2)); do + busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \ + hw_stats_get l3_stats $h2.$i rx packets > /dev/null + check_err $? "Traffic not seen at RIF $h2.$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 1a7a472edfd0..688338bbeb97 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -25,7 +25,16 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh new file mode 120000 index 000000000000..1f5752e8ffc0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh @@ -0,0 +1 @@ +../spectrum/rif_counter_scale.sh \ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 70c9da8fe303..95d9f710a630 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,16 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh new file mode 100644 index 000000000000..d44536276e8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_counter_scale.sh + +rif_counter_get_target() +{ + local should_fail=$1; shift + local max_cnts + local max_rifs + local target + + max_rifs=$(devlink_resource_size_get rifs) + max_cnts=$(devlink_resource_size_get counters rif) + + # Remove already allocated RIFs. + ((max_rifs -= $(devlink_resource_occ_get rifs))) + + # 10 KVD slots per counter, ingress+egress counters per RIF + ((max_cnts /= 20)) + + # Pointless to run the overflow test if we don't have enough RIFs to + # host all the counters. + if ((max_cnts > max_rifs && should_fail)); then + echo 0 + return + fi + + target=$((max_rifs < max_cnts ? max_rifs : max_cnts)) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} -- cgit v1.2.3-59-g8ed1b From ed62af45467a6786cbdeef42a7b4e7ced374f593 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 16 Jun 2022 13:42:45 +0300 Subject: selftests: spectrum-2: tc_flower_scale: Dynamically set scale target Instead of hard coding the scale target in the test, dynamically set it based on the maximum number of flow counters and their current occupancy. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index efd798a85931..4444bbace1a9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -4,17 +4,22 @@ source ../tc_flower_scale.sh tc_flower_get_target() { local should_fail=$1; shift + local max_cnts # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 30K (30,720) flow counters and six of - # these are used for multicast routing. - local target=30714 + max_cnts=$(devlink_resource_size_get counters flow) + + # Remove already allocated counters. + ((max_cnts -= $(devlink_resource_occ_get counters flow))) + + # Each rule uses two counters, for packets and bytes. + ((max_cnts /= 2)) if ((! should_fail)); then - echo $target + echo $max_cnts else - echo $((target + 1)) + echo $((max_cnts + 1)) fi } -- cgit v1.2.3-59-g8ed1b From e068c0776b0bbd3fc5a283b0e0eaa1cbb9ef0e3d Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 20 Jun 2022 13:49:39 +0300 Subject: selftests/bpf: Enable config options needed for xdp_synproxy test This commit adds the kernel config options needed to run the recently added xdp_synproxy test. Users without these options will hit errors like this: test_synproxy:FAIL:iptables -t raw -I PREROUTING -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack unexpected error: 256 (errno 22) Suggested-by: Alexei Starovoitov Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220620104939.4094104-1-maximmi@nvidia.com --- tools/testing/selftests/bpf/config | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3b3edc0fc8a6..c05904d631ec 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -57,3 +57,9 @@ CONFIG_FPROBE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_MPTCP=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IP_NF_RAW=y -- cgit v1.2.3-59-g8ed1b From 933ff53191eb7a8492370bad6339a1ca6da2d939 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:40 +0300 Subject: selftests/bpf: specify expected instructions in test_verifier tests Allows to specify expected and unexpected instruction sequences in test_verifier test cases. The instructions are requested from kernel after BPF program loading, thus allowing to check some of the transformations applied by BPF verifier. - `expected_insn` field specifies a sequence of instructions expected to be found in the program; - `unexpected_insn` field specifies a sequence of instructions that are not expected to be found in the program; - `INSN_OFF_MASK` and `INSN_IMM_MASK` values could be used to mask `off` and `imm` fields. - `SKIP_INSNS` could be used to specify that some instructions in the (un)expected pattern are not important (behavior similar to usage of `\t` in `errstr` field). The intended usage is as follows: { "inline simple bpf_loop call", .insns = { /* main */ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), ... BPF_EXIT_INSN(), /* callback */ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), BPF_EXIT_INSN(), }, .expected_insns = { BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), SKIP_INSNS(), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, 8, 1) }, .unexpected_insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, .result = ACCEPT, .runs = 0, }, Here it is expected that move of 1 to register 1 would remain in place and helper function call instruction would be replaced by a relative call instruction. Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 234 ++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 372579c9f45e..1f24eae9e16e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -51,6 +51,8 @@ #endif #define MAX_INSNS BPF_MAXINSNS +#define MAX_EXPECTED_INSNS 32 +#define MAX_UNEXPECTED_INSNS 32 #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 23 @@ -58,6 +60,10 @@ #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#define INSN_OFF_MASK ((__s16)0xFFFF) +#define INSN_IMM_MASK ((__s32)0xFFFFFFFF) +#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) + #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -79,6 +85,23 @@ struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; struct bpf_insn *fill_insns; + /* If specified, test engine looks for this sequence of + * instructions in the BPF program after loading. Allows to + * test rewrites applied by verifier. Use values + * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm` + * fields if content does not matter. The test case fails if + * specified instructions are not found. + * + * The sequence could be split into sub-sequences by adding + * SKIP_INSNS instruction at the end of each sub-sequence. In + * such case sub-sequences are searched for one after another. + */ + struct bpf_insn expected_insns[MAX_EXPECTED_INSNS]; + /* If specified, test engine applies same pattern matching + * logic as for `expected_insns`. If the specified pattern is + * matched test case is marked as failed. + */ + struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS]; int fixup_map_hash_8b[MAX_FIXUPS]; int fixup_map_hash_48b[MAX_FIXUPS]; int fixup_map_hash_16b[MAX_FIXUPS]; @@ -1126,6 +1149,214 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_obj_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)*buf; + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_obj_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static bool is_null_insn(struct bpf_insn *insn) +{ + struct bpf_insn null_insn = {}; + + return memcmp(insn, &null_insn, sizeof(null_insn)) == 0; +} + +static bool is_skip_insn(struct bpf_insn *insn) +{ + struct bpf_insn skip_insn = SKIP_INSNS(); + + return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0; +} + +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len) +{ + int i; + + for (i = 0; i < max_len; ++i) { + if (is_null_insn(&seq[i])) + return i; + } + return max_len; +} + +static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked) +{ + struct bpf_insn orig_masked; + + memcpy(&orig_masked, orig, sizeof(orig_masked)); + if (masked->imm == INSN_IMM_MASK) + orig_masked.imm = INSN_IMM_MASK; + if (masked->off == INSN_OFF_MASK) + orig_masked.off = INSN_OFF_MASK; + + return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0; +} + +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq, + int seq_len, int subseq_len) +{ + int i, j; + + if (subseq_len > seq_len) + return -1; + + for (i = 0; i < seq_len - subseq_len + 1; ++i) { + bool found = true; + + for (j = 0; j < subseq_len; ++j) { + if (!compare_masked_insn(&seq[i + j], &subseq[j])) { + found = false; + break; + } + } + if (found) + return i; + } + + return -1; +} + +static int find_skip_insn_marker(struct bpf_insn *seq, int len) +{ + int i; + + for (i = 0; i < len; ++i) + if (is_skip_insn(&seq[i])) + return i; + + return -1; +} + +/* Return true if all sub-sequences in `subseqs` could be found in + * `seq` one after another. Sub-sequences are separated by a single + * nil instruction. + */ +static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs, + int seq_len, int max_subseqs_len) +{ + int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len); + + while (subseqs_len > 0) { + int skip_idx = find_skip_insn_marker(subseqs, subseqs_len); + int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx; + int subseq_idx = find_insn_subseq(seq, subseqs, + seq_len, cur_subseq_len); + + if (subseq_idx < 0) + return false; + seq += subseq_idx + cur_subseq_len; + seq_len -= subseq_idx + cur_subseq_len; + subseqs += cur_subseq_len + 1; + subseqs_len -= cur_subseq_len + 1; + } + + return true; +} + +static void print_insn(struct bpf_insn *buf, int cnt) +{ + int i; + + printf(" addr op d s off imm\n"); + for (i = 0; i < cnt; ++i) { + struct bpf_insn *insn = &buf[i]; + + if (is_null_insn(insn)) + break; + + if (is_skip_insn(insn)) + printf(" ...\n"); + else + printf(" %04x: %02x %1x %x %04hx %08x\n", + i, insn->code, insn->dst_reg, + insn->src_reg, insn->off, insn->imm); + } +} + +static bool check_xlated_program(struct bpf_test *test, int fd_prog) +{ + struct bpf_insn *buf; + int cnt; + bool result = true; + bool check_expected = !is_null_insn(test->expected_insns); + bool check_unexpected = !is_null_insn(test->unexpected_insns); + + if (!check_expected && !check_unexpected) + goto out; + + if (get_xlated_program(fd_prog, &buf, &cnt)) { + printf("FAIL: can't get xlated program\n"); + result = false; + goto out; + } + + if (check_expected && + !find_all_insn_subseqs(buf, test->expected_insns, + cnt, MAX_EXPECTED_INSNS)) { + printf("FAIL: can't find expected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Expected subsequence:\n"); + print_insn(test->expected_insns, MAX_EXPECTED_INSNS); + } + } + + if (check_unexpected && + find_all_insn_subseqs(buf, test->unexpected_insns, + cnt, MAX_UNEXPECTED_INSNS)) { + printf("FAIL: found unexpected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Un-expected subsequence:\n"); + print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS); + } + } + + free(buf); + out: + return result; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { @@ -1262,6 +1493,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (verbose) printf(", verifier log:\n%s", bpf_vlog); + if (!check_xlated_program(test, fd_prog)) + goto fail_log; + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { -- cgit v1.2.3-59-g8ed1b From 7a42008ca5c700819e4b3003025e5e1695fd1f86 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:41 +0300 Subject: selftests/bpf: allow BTF specs and func infos in test_verifier tests The BTF and func_info specification for test_verifier tests follows the same notation as in prog_tests/btf.c tests. E.g.: ... .func_info = { { 0, 6 }, { 8, 7 } }, .func_info_cnt = 2, .btf_strings = "\0int\0", .btf_types = { BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), BTF_PTR_ENC(1), }, ... The BTF specification is loaded only when specified. Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 1 - tools/testing/selftests/bpf/test_btf.h | 2 + tools/testing/selftests/bpf/test_verifier.c | 94 +++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 18 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index edb387163baa..1fd792a92a1c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -34,7 +34,6 @@ static bool always_log; #undef CHECK #define CHECK(condition, format...) _CHECK(condition, "check", duration, format) -#define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f #define NAME_NTH(N) (0xfffe0000 | N) diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 38782bd47fdc..fb4f4714eeb4 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -4,6 +4,8 @@ #ifndef _TEST_BTF_H #define _TEST_BTF_H +#define BTF_END_RAW 0xdeadbeef + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1f24eae9e16e..7fe897c66d81 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -59,11 +59,17 @@ #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#define MAX_FUNC_INFOS 8 +#define MAX_BTF_STRINGS 256 +#define MAX_BTF_TYPES 256 #define INSN_OFF_MASK ((__s16)0xFFFF) #define INSN_IMM_MASK ((__s32)0xFFFFFFFF) #define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) +#define DEFAULT_LIBBPF_LOG_LEVEL 4 +#define VERBOSE_LIBBPF_LOG_LEVEL 1 + #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -158,6 +164,14 @@ struct bpf_test { }; enum bpf_attach_type expected_attach_type; const char *kfunc; + struct bpf_func_info func_info[MAX_FUNC_INFOS]; + int func_info_cnt; + char btf_strings[MAX_BTF_STRINGS]; + /* A set of BTF types to load when specified, + * use macro definitions from test_btf.h, + * must end with BTF_END_RAW + */ + __u32 btf_types[MAX_BTF_TYPES]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -687,34 +701,66 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; -static int load_btf(void) +static char bpf_vlog[UINT_MAX >> 8]; + +static int load_btf_spec(__u32 *types, int types_len, + const char *strings, int strings_len) { struct btf_header hdr = { .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(btf_raw_types), - .str_off = sizeof(btf_raw_types), - .str_len = sizeof(btf_str_sec), + .type_len = types_len, + .str_off = types_len, + .str_len = strings_len, }; void *ptr, *raw_btf; int btf_fd; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .log_buf = bpf_vlog, + .log_size = sizeof(bpf_vlog), + .log_level = (verbose + ? VERBOSE_LIBBPF_LOG_LEVEL + : DEFAULT_LIBBPF_LOG_LEVEL), + ); - ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + - sizeof(btf_str_sec)); + raw_btf = malloc(sizeof(hdr) + types_len + strings_len); + ptr = raw_btf; memcpy(ptr, &hdr, sizeof(hdr)); ptr += sizeof(hdr); - memcpy(ptr, btf_raw_types, hdr.type_len); + memcpy(ptr, types, hdr.type_len); ptr += hdr.type_len; - memcpy(ptr, btf_str_sec, hdr.str_len); + memcpy(ptr, strings, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); - free(raw_btf); + btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts); if (btf_fd < 0) - return -1; - return btf_fd; + printf("Failed to load BTF spec: '%s'\n", strerror(errno)); + + free(raw_btf); + + return btf_fd < 0 ? -1 : btf_fd; +} + +static int load_btf(void) +{ + return load_btf_spec(btf_raw_types, sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); +} + +static int load_btf_for_test(struct bpf_test *test) +{ + int types_num = 0; + + while (types_num < MAX_BTF_TYPES && + test->btf_types[types_num] != BTF_END_RAW) + ++types_num; + + int types_len = types_num * sizeof(test->btf_types[0]); + + return load_btf_spec(test->btf_types, types_len, + test->btf_strings, sizeof(test->btf_strings)); } static int create_map_spin_lock(void) @@ -793,8 +839,6 @@ static int create_map_kptr(void) return fd; } -static char bpf_vlog[UINT_MAX >> 8]; - static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { @@ -1360,7 +1404,7 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog) static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, alignment_prevented_execution; + int fd_prog, btf_fd, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; LIBBPF_OPTS(bpf_prog_load_opts, opts); @@ -1372,8 +1416,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; + btf_fd = -1; if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; @@ -1406,11 +1452,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = 1; + opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else - opts.log_level = 4; + opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { @@ -1428,6 +1474,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.attach_btf_id = attach_btf_id; } + if (test->btf_types[0] != 0) { + btf_fd = load_btf_for_test(test); + if (btf_fd < 0) + goto fail_log; + opts.prog_btf_fd = btf_fd; + } + + if (test->func_info_cnt != 0) { + opts.func_info = test->func_info; + opts.func_info_cnt = test->func_info_cnt; + opts.func_info_rec_size = sizeof(test->func_info[0]); + } + opts.log_buf = bpf_vlog; opts.log_size = sizeof(bpf_vlog); fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); @@ -1539,6 +1598,7 @@ close_fds: if (test->fill_insns) free(test->fill_insns); close(fd_prog); + close(btf_fd); for (i = 0; i < MAX_NR_MAPS; i++) close(map_fds[i]); sched_yield(); -- cgit v1.2.3-59-g8ed1b From f8acfdd04410d26b096a7082444cdc402df10f89 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:43 +0300 Subject: selftests/bpf: BPF test_verifier selftests for bpf_loop inlining A number of test cases for BPF selftests test_verifier to check how bpf_loop inline transformation rewrites the BPF program. The following cases are covered: - happy path - no-rewrite when flags is non-zero - no-rewrite when callback is non-constant - subprogno in insn_aux is updated correctly when dead sub-programs are removed - check that correct stack offsets are assigned for spilling of R6-R8 registers Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/verifier/bpf_loop_inline.c | 252 +++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 tools/testing/selftests/bpf/verifier/bpf_loop_inline.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c new file mode 100644 index 000000000000..232da07c93b5 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -0,0 +1,252 @@ +#define BTF_TYPES \ + .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \ + .btf_types = { \ + /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \ + /* 2: int* */ BTF_PTR_ENC(1), \ + /* 3: void* */ BTF_PTR_ENC(0), \ + /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 3), \ + /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \ + BTF_FUNC_PROTO_ARG_ENC(5, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 2), \ + /* 6: main */ BTF_FUNC_ENC(20, 4), \ + /* 7: callback */ BTF_FUNC_ENC(11, 5), \ + BTF_END_RAW \ + } + +#define MAIN_TYPE 6 +#define CALLBACK_TYPE 7 + +/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF + * fields for pseudo calls + */ +#define PSEUDO_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \ + INSN_OFF_MASK, INSN_IMM_MASK) + +/* can't use BPF_FUNC_loop constant, + * do_mix_fixups adjusts the IMM field + */ +#define HELPER_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK) + +{ + "inline simple bpf_loop call", + .insns = { + /* main */ + /* force verifier state branching to verify logic on first and + * subsequent bpf_loop insn processing steps + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, flags non-zero", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -10), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, callback non-constant", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */ + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 14, CALLBACK_TYPE }, + { 16, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline and a dead func", + .insns = { + /* main */ + + /* A reference to callback #1 to make verifier count it as a func. + * This reference is overwritten below and callback #1 is dead. + */ + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 10, CALLBACK_TYPE }, + { 12, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline stack locations for loop vars", + .insns = { + /* main */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + /* bpf_loop call #1 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* bpf_loop call #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* call func and exit */ + BPF_CALL_REL(2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* func */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + SKIP_INSNS(), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + /* offsets are the same as in the first call */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + SKIP_INSNS(), + /* offsets differ from main because of different offset + * in BPF_ST_MEM instruction + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40), + }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .func_info = { + { 0, MAIN_TYPE }, + { 16, MAIN_TYPE }, + { 25, CALLBACK_TYPE }, + }, + .func_info_cnt = 3, + BTF_TYPES +}, + +#undef HELPER_CALL_INSN +#undef PSEUDO_CALL_INSN +#undef CALLBACK_TYPE +#undef MAIN_TYPE +#undef BTF_TYPES -- cgit v1.2.3-59-g8ed1b From 0e1bf9ed2000c16fa8e0703e255a23d64a4adb27 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 21 Jun 2022 02:53:44 +0300 Subject: selftests/bpf: BPF test_prog selftests for bpf_loop inlining Two new test BPF programs for test_prog selftests checking bpf_loop behavior. Both are corner cases for bpf_loop inlinig transformation: - check that bpf_loop behaves correctly when callback function is not a compile time constant - check that local function variables are not affected by allocating additional stack storage for registers spilled by loop inlining Signed-off-by: Eduard Zingerman Acked-by: Song Liu Link: https://lore.kernel.org/r/20220620235344.569325-6-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_loop.c | 62 ++++++++++++ tools/testing/selftests/bpf/progs/bpf_loop.c | 114 ++++++++++++++++++++++ 2 files changed, 176 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c index 380d7a2072e3..4cd8a25afe68 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel) bpf_link__destroy(link); } +static void check_non_constant_callback(struct bpf_loop *skel) +{ + struct bpf_link *link = + bpf_program__attach(skel->progs.prog_non_constant_callback); + + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->callback_selector = 0x0F; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1"); + + skel->bss->callback_selector = 0xF0; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2"); + + bpf_link__destroy(link); +} + +static void check_stack(struct bpf_loop *skel) +{ + struct bpf_link *link = bpf_program__attach(skel->progs.stack_check); + const int max_key = 12; + int key; + int map_fd; + + if (!ASSERT_OK_PTR(link, "link")) + return; + + map_fd = bpf_map__fd(skel->maps.map1); + + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) + goto out; + + for (key = 1; key <= max_key; ++key) { + int val = key; + int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST); + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + } + + usleep(1); + + for (key = 1; key <= max_key; ++key) { + int val; + int err = bpf_map_lookup_elem(map_fd, &key, &val); + + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val, key + 1, "bad value in the map")) + goto out; + } + +out: + bpf_link__destroy(link); +} + void test_bpf_loop(void) { struct bpf_loop *skel; @@ -140,6 +198,10 @@ void test_bpf_loop(void) check_invalid_flags(skel); if (test__start_subtest("check_nested_calls")) check_nested_calls(skel); + if (test__start_subtest("check_non_constant_callback")) + check_non_constant_callback(skel); + if (test__start_subtest("check_stack")) + check_stack(skel); bpf_loop__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index e08565282759..de1fc82d2710 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -11,11 +11,19 @@ struct callback_ctx { int output; }; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32); + __type(key, int); + __type(value, int); +} map1 SEC(".maps"); + /* These should be set by the user program */ u32 nested_callback_nr_loops; u32 stop_index = -1; u32 nr_loops; int pid; +int callback_selector; /* Making these global variables so that the userspace program * can verify the output through the skeleton @@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx) return 0; } + +static int callback_set_f0(int i, void *ctx) +{ + g_output = 0xF0; + return 0; +} + +static int callback_set_0f(int i, void *ctx) +{ + g_output = 0x0F; + return 0; +} + +/* + * non-constant callback is a corner case for bpf_loop inline logic + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int prog_non_constant_callback(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int (*callback)(int i, void *ctx); + + g_output = 0; + + if (callback_selector == 0x0F) + callback = callback_set_0f; + else + callback = callback_set_f0; + + bpf_loop(1, callback, NULL, 0); + + return 0; +} + +static int stack_check_inner_callback(void *ctx) +{ + return 0; +} + +static int map1_lookup_elem(int key) +{ + int *val = bpf_map_lookup_elem(&map1, &key); + + return val ? *val : -1; +} + +static void map1_update_elem(int key, int val) +{ + bpf_map_update_elem(&map1, &key, &val, BPF_ANY); +} + +static int stack_check_outer_callback(void *ctx) +{ + int a = map1_lookup_elem(1); + int b = map1_lookup_elem(2); + int c = map1_lookup_elem(3); + int d = map1_lookup_elem(4); + int e = map1_lookup_elem(5); + int f = map1_lookup_elem(6); + + bpf_loop(1, stack_check_inner_callback, NULL, 0); + + map1_update_elem(1, a + 1); + map1_update_elem(2, b + 1); + map1_update_elem(3, c + 1); + map1_update_elem(4, d + 1); + map1_update_elem(5, e + 1); + map1_update_elem(6, f + 1); + + return 0; +} + +/* Some of the local variables in stack_check and + * stack_check_outer_callback would be allocated on stack by + * compiler. This test should verify that stack content for these + * variables is preserved between calls to bpf_loop (might be an issue + * if loop inlining allocates stack slots incorrectly). + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int stack_check(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int a = map1_lookup_elem(7); + int b = map1_lookup_elem(8); + int c = map1_lookup_elem(9); + int d = map1_lookup_elem(10); + int e = map1_lookup_elem(11); + int f = map1_lookup_elem(12); + + bpf_loop(1, stack_check_outer_callback, NULL, 0); + + map1_update_elem(7, a + 1); + map1_update_elem(8, b + 1); + map1_update_elem(9, c + 1); + map1_update_elem(10, d + 1); + map1_update_elem(11, e + 1); + map1_update_elem(12, f + 1); + + return 0; +} -- cgit v1.2.3-59-g8ed1b From 73087489250def7cdda2dee5ba685bdeae73b8af Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 20 Jun 2022 15:25:54 -0700 Subject: selftests/bpf: Add benchmark for local_storage get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a benchmarks to demonstrate the performance cliff for local_storage get as the number of local_storage maps increases beyond current local_storage implementation's cache size. "sequential get" and "interleaved get" benchmarks are added, both of which do many bpf_task_storage_get calls on sets of task local_storage maps of various counts, while considering a single specific map to be 'important' and counting task_storage_gets to the important map separately in addition to normal 'hits' count of all gets. Goal here is to mimic scenario where a particular program using one map - the important one - is running on a system where many other local_storage maps exist and are accessed often. While "sequential get" benchmark does bpf_task_storage_get for map 0, 1, ..., {9, 99, 999} in order, "interleaved" benchmark interleaves 4 bpf_task_storage_gets for the important map for every 10 map gets. This is meant to highlight performance differences when important map is accessed far more frequently than non-important maps. A "hashmap control" benchmark is also included for easy comparison of standard bpf hashmap lookup vs local_storage get. The benchmark is similar to "sequential get", but creates and uses BPF_MAP_TYPE_HASH instead of local storage. Only one inner map is created - a hashmap meant to hold tid -> data mapping for all tasks. Size of the hashmap is hardcoded to my system's PID_MAX_LIMIT (4,194,304). The number of these keys which are actually fetched as part of the benchmark is configurable. Addition of this benchmark is inspired by conversation with Alexei in a previous patchset's thread [0], which highlighted the need for such a benchmark to motivate and validate improvements to local_storage implementation. My approach in that series focused on improving performance for explicitly-marked 'important' maps and was rejected with feedback to make more generally-applicable improvements while avoiding explicitly marking maps as important. Thus the benchmark reports both general and important-map-focused metrics, so effect of future work on both is clear. Regarding the benchmark results. On a powerful system (Skylake, 20 cores, 256gb ram): Hashmap Control =============== num keys: 10 hashmap (control) sequential get: hits throughput: 20.900 ± 0.334 M ops/s, hits latency: 47.847 ns/op, important_hits throughput: 20.900 ± 0.334 M ops/s num keys: 1000 hashmap (control) sequential get: hits throughput: 13.758 ± 0.219 M ops/s, hits latency: 72.683 ns/op, important_hits throughput: 13.758 ± 0.219 M ops/s num keys: 10000 hashmap (control) sequential get: hits throughput: 6.995 ± 0.034 M ops/s, hits latency: 142.959 ns/op, important_hits throughput: 6.995 ± 0.034 M ops/s num keys: 100000 hashmap (control) sequential get: hits throughput: 4.452 ± 0.371 M ops/s, hits latency: 224.635 ns/op, important_hits throughput: 4.452 ± 0.371 M ops/s num keys: 4194304 hashmap (control) sequential get: hits throughput: 3.043 ± 0.033 M ops/s, hits latency: 328.587 ns/op, important_hits throughput: 3.043 ± 0.033 M ops/s Local Storage ============= num_maps: 1 local_storage cache sequential get: hits throughput: 47.298 ± 0.180 M ops/s, hits latency: 21.142 ns/op, important_hits throughput: 47.298 ± 0.180 M ops/s local_storage cache interleaved get: hits throughput: 55.277 ± 0.888 M ops/s, hits latency: 18.091 ns/op, important_hits throughput: 55.277 ± 0.888 M ops/s num_maps: 10 local_storage cache sequential get: hits throughput: 40.240 ± 0.802 M ops/s, hits latency: 24.851 ns/op, important_hits throughput: 4.024 ± 0.080 M ops/s local_storage cache interleaved get: hits throughput: 48.701 ± 0.722 M ops/s, hits latency: 20.533 ns/op, important_hits throughput: 17.393 ± 0.258 M ops/s num_maps: 16 local_storage cache sequential get: hits throughput: 44.515 ± 0.708 M ops/s, hits latency: 22.464 ns/op, important_hits throughput: 2.782 ± 0.044 M ops/s local_storage cache interleaved get: hits throughput: 49.553 ± 2.260 M ops/s, hits latency: 20.181 ns/op, important_hits throughput: 15.767 ± 0.719 M ops/s num_maps: 17 local_storage cache sequential get: hits throughput: 38.778 ± 0.302 M ops/s, hits latency: 25.788 ns/op, important_hits throughput: 2.284 ± 0.018 M ops/s local_storage cache interleaved get: hits throughput: 43.848 ± 1.023 M ops/s, hits latency: 22.806 ns/op, important_hits throughput: 13.349 ± 0.311 M ops/s num_maps: 24 local_storage cache sequential get: hits throughput: 19.317 ± 0.568 M ops/s, hits latency: 51.769 ns/op, important_hits throughput: 0.806 ± 0.024 M ops/s local_storage cache interleaved get: hits throughput: 24.397 ± 0.272 M ops/s, hits latency: 40.989 ns/op, important_hits throughput: 6.863 ± 0.077 M ops/s num_maps: 32 local_storage cache sequential get: hits throughput: 13.333 ± 0.135 M ops/s, hits latency: 75.000 ns/op, important_hits throughput: 0.417 ± 0.004 M ops/s local_storage cache interleaved get: hits throughput: 16.898 ± 0.383 M ops/s, hits latency: 59.178 ns/op, important_hits throughput: 4.717 ± 0.107 M ops/s num_maps: 100 local_storage cache sequential get: hits throughput: 6.360 ± 0.107 M ops/s, hits latency: 157.233 ns/op, important_hits throughput: 0.064 ± 0.001 M ops/s local_storage cache interleaved get: hits throughput: 7.303 ± 0.362 M ops/s, hits latency: 136.930 ns/op, important_hits throughput: 1.907 ± 0.094 M ops/s num_maps: 1000 local_storage cache sequential get: hits throughput: 0.452 ± 0.010 M ops/s, hits latency: 2214.022 ns/op, important_hits throughput: 0.000 ± 0.000 M ops/s local_storage cache interleaved get: hits throughput: 0.542 ± 0.007 M ops/s, hits latency: 1843.341 ns/op, important_hits throughput: 0.136 ± 0.002 M ops/s Looking at the "sequential get" results, it's clear that as the number of task local_storage maps grows beyond the current cache size (16), there's a significant reduction in hits throughput. Note that current local_storage implementation assigns a cache_idx to maps as they are created. Since "sequential get" is creating maps 0..n in order and then doing bpf_task_storage_get calls in the same order, the benchmark is effectively ensuring that a map will not be in cache when the program tries to access it. For "interleaved get" results, important-map hits throughput is greatly increased as the important map is more likely to be in cache by virtue of being accessed far more frequently. Throughput still reduces as # maps increases, though. To get a sense of the overhead of the benchmark program, I commented out bpf_task_storage_get/bpf_map_lookup_elem in local_storage_bench.c and ran the benchmark on the same host as the 'real' run. Results: Hashmap Control =============== num keys: 10 hashmap (control) sequential get: hits throughput: 54.288 ± 0.655 M ops/s, hits latency: 18.420 ns/op, important_hits throughput: 54.288 ± 0.655 M ops/s num keys: 1000 hashmap (control) sequential get: hits throughput: 52.913 ± 0.519 M ops/s, hits latency: 18.899 ns/op, important_hits throughput: 52.913 ± 0.519 M ops/s num keys: 10000 hashmap (control) sequential get: hits throughput: 53.480 ± 1.235 M ops/s, hits latency: 18.699 ns/op, important_hits throughput: 53.480 ± 1.235 M ops/s num keys: 100000 hashmap (control) sequential get: hits throughput: 54.982 ± 1.902 M ops/s, hits latency: 18.188 ns/op, important_hits throughput: 54.982 ± 1.902 M ops/s num keys: 4194304 hashmap (control) sequential get: hits throughput: 50.858 ± 0.707 M ops/s, hits latency: 19.662 ns/op, important_hits throughput: 50.858 ± 0.707 M ops/s Local Storage ============= num_maps: 1 local_storage cache sequential get: hits throughput: 110.990 ± 4.828 M ops/s, hits latency: 9.010 ns/op, important_hits throughput: 110.990 ± 4.828 M ops/s local_storage cache interleaved get: hits throughput: 161.057 ± 4.090 M ops/s, hits latency: 6.209 ns/op, important_hits throughput: 161.057 ± 4.090 M ops/s num_maps: 10 local_storage cache sequential get: hits throughput: 112.930 ± 1.079 M ops/s, hits latency: 8.855 ns/op, important_hits throughput: 11.293 ± 0.108 M ops/s local_storage cache interleaved get: hits throughput: 115.841 ± 2.088 M ops/s, hits latency: 8.633 ns/op, important_hits throughput: 41.372 ± 0.746 M ops/s num_maps: 16 local_storage cache sequential get: hits throughput: 115.653 ± 0.416 M ops/s, hits latency: 8.647 ns/op, important_hits throughput: 7.228 ± 0.026 M ops/s local_storage cache interleaved get: hits throughput: 138.717 ± 1.649 M ops/s, hits latency: 7.209 ns/op, important_hits throughput: 44.137 ± 0.525 M ops/s num_maps: 17 local_storage cache sequential get: hits throughput: 112.020 ± 1.649 M ops/s, hits latency: 8.927 ns/op, important_hits throughput: 6.598 ± 0.097 M ops/s local_storage cache interleaved get: hits throughput: 128.089 ± 1.960 M ops/s, hits latency: 7.807 ns/op, important_hits throughput: 38.995 ± 0.597 M ops/s num_maps: 24 local_storage cache sequential get: hits throughput: 92.447 ± 5.170 M ops/s, hits latency: 10.817 ns/op, important_hits throughput: 3.855 ± 0.216 M ops/s local_storage cache interleaved get: hits throughput: 128.844 ± 2.808 M ops/s, hits latency: 7.761 ns/op, important_hits throughput: 36.245 ± 0.790 M ops/s num_maps: 32 local_storage cache sequential get: hits throughput: 102.042 ± 1.462 M ops/s, hits latency: 9.800 ns/op, important_hits throughput: 3.194 ± 0.046 M ops/s local_storage cache interleaved get: hits throughput: 126.577 ± 1.818 M ops/s, hits latency: 7.900 ns/op, important_hits throughput: 35.332 ± 0.507 M ops/s num_maps: 100 local_storage cache sequential get: hits throughput: 111.327 ± 1.401 M ops/s, hits latency: 8.983 ns/op, important_hits throughput: 1.113 ± 0.014 M ops/s local_storage cache interleaved get: hits throughput: 131.327 ± 1.339 M ops/s, hits latency: 7.615 ns/op, important_hits throughput: 34.302 ± 0.350 M ops/s num_maps: 1000 local_storage cache sequential get: hits throughput: 101.978 ± 0.563 M ops/s, hits latency: 9.806 ns/op, important_hits throughput: 0.102 ± 0.001 M ops/s local_storage cache interleaved get: hits throughput: 141.084 ± 1.098 M ops/s, hits latency: 7.088 ns/op, important_hits throughput: 35.430 ± 0.276 M ops/s Adjusting for overhead, latency numbers for "hashmap control" and "sequential get" are: hashmap_control_1k: ~53.8ns hashmap_control_10k: ~124.2ns hashmap_control_100k: ~206.5ns sequential_get_1: ~12.1ns sequential_get_10: ~16.0ns sequential_get_16: ~13.8ns sequential_get_17: ~16.8ns sequential_get_24: ~40.9ns sequential_get_32: ~65.2ns sequential_get_100: ~148.2ns sequential_get_1000: ~2204ns Clearly demonstrating a cliff. In the discussion for v1 of this patch, Alexei noted that local_storage was 2.5x faster than a large hashmap when initially implemented [1]. The benchmark results show that local_storage is 5-10x faster: a long-running BPF application putting some pid-specific info into a hashmap for each pid it sees will probably see on the order of 10-100k pids. Bench numbers for hashmaps of this size are ~10x slower than sequential_get_16, but as the number of local_storage maps grows far past local_storage cache size the performance advantage shrinks and eventually reverses. When running the benchmarks it may be necessary to bump 'open files' ulimit for a successful run. [0]: https://lore.kernel.org/all/20220420002143.1096548-1-davemarchevsky@fb.com [1]: https://lore.kernel.org/bpf/20220511173305.ftldpn23m4ski3d3@MBP-98dd607d3435.dhcp.thefacebook.com/ Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20220620222554.270578-1-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bench.c | 55 ++++ tools/testing/selftests/bpf/bench.h | 4 + .../selftests/bpf/benchs/bench_local_storage.c | 287 +++++++++++++++++++++ .../bpf/benchs/run_bench_local_storage.sh | 24 ++ tools/testing/selftests/bpf/benchs/run_common.sh | 17 ++ .../selftests/bpf/progs/local_storage_bench.c | 104 ++++++++ 7 files changed, 494 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_local_storage.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh create mode 100644 tools/testing/selftests/bpf/progs/local_storage_bench.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cb8e552e1418..4fbd88a8ed9e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -571,6 +571,7 @@ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h +$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -583,7 +584,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ $(OUTPUT)/bench_strncmp.o \ - $(OUTPUT)/bench_bpf_hashmap_full_update.o + $(OUTPUT)/bench_bpf_hashmap_full_update.o \ + $(OUTPUT)/bench_local_storage.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index d8aa62be996b..1e7b5d4b1f11 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -150,6 +150,53 @@ void ops_report_final(struct bench_res res[], int res_cnt) printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); } +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double important_hits_per_sec, hits_per_sec; + double delta_sec = delta_ns / 1000000000.0; + + hits_per_sec = res->hits / 1000000.0 / delta_sec; + important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s ", hits_per_sec); + printf("important_hits %8.3lfM/s\n", important_hits_per_sec); +} + +void local_storage_report_final(struct bench_res res[], int res_cnt) +{ + double important_hits_mean = 0.0, important_hits_stddev = 0.0; + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + important_hits_stddev += + (important_hits_mean - res[i].important_hits / 1000000.0) * + (important_hits_mean - res[i].important_hits / 1000000.0) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + important_hits_stddev = sqrt(important_hits_stddev); + } + printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ", + hits_mean, hits_stddev); + printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean); + printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n", + important_hits_mean, important_hits_stddev); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = ""; const char argp_program_doc[] = @@ -188,12 +235,14 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; extern struct argp bench_bpf_loop_argp; +extern struct argp bench_local_storage_argp; extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + { &bench_local_storage_argp, 0, "local_storage benchmark", 0 }, { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, {}, }; @@ -397,6 +446,9 @@ extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; extern const struct bench bench_bpf_hashmap_full_update; +extern const struct bench bench_local_storage_cache_seq_get; +extern const struct bench bench_local_storage_cache_interleaved_get; +extern const struct bench bench_local_storage_cache_hashmap_control; static const struct bench *benchs[] = { &bench_count_global, @@ -432,6 +484,9 @@ static const struct bench *benchs[] = { &bench_strncmp_no_helper, &bench_strncmp_helper, &bench_bpf_hashmap_full_update, + &bench_local_storage_cache_seq_get, + &bench_local_storage_cache_interleaved_get, + &bench_local_storage_cache_hashmap_control, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index fb3e213df3dc..4b15286753ba 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -34,6 +34,7 @@ struct bench_res { long hits; long drops; long false_hits; + long important_hits; }; struct bench { @@ -61,6 +62,9 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); void ops_report_progress(int iter, struct bench_res *res, long delta_ns); void ops_report_final(struct bench_res res[], int res_cnt); +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns); +void local_storage_report_final(struct bench_res res[], int res_cnt); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 000000000000..5a378c84e81f --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include +#include + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 000000000000..2eb2b513a173 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 6c5e6023a69f..d9f40af82006 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -41,6 +41,16 @@ function ops() echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" } +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -67,6 +77,13 @@ function summarize_ops() printf "%-20s %s\n" "$bench" "$(ops $summary)" } +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c new file mode 100644 index 000000000000..2c3234c5b73a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +#define HASHMAP_SZ 4194304 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + }); +} array_of_local_storage_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, HASHMAP_SZ); + __type(key, int); + __type(value, int); + }); +} array_of_hash_maps SEC(".maps"); + +long important_hits; +long hits; + +/* set from user-space */ +const volatile unsigned int use_hashmap; +const volatile unsigned int hashmap_num_keys; +const volatile unsigned int num_maps; +const volatile unsigned int interleave; + +struct loop_ctx { + struct task_struct *task; + long loop_hits; + long loop_important_hits; +}; + +static int do_lookup(unsigned int elem, struct loop_ctx *lctx) +{ + void *map, *inner_map; + int idx = 0; + + if (use_hashmap) + map = &array_of_hash_maps; + else + map = &array_of_local_storage_maps; + + inner_map = bpf_map_lookup_elem(map, &elem); + if (!inner_map) + return -1; + + if (use_hashmap) { + idx = bpf_get_prandom_u32() % hashmap_num_keys; + bpf_map_lookup_elem(inner_map, &idx); + } else { + bpf_task_storage_get(inner_map, lctx->task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + } + + lctx->loop_hits++; + if (!elem) + lctx->loop_important_hits++; + return 0; +} + +static long loop(u32 index, void *ctx) +{ + struct loop_ctx *lctx = (struct loop_ctx *)ctx; + unsigned int map_idx = index % num_maps; + + do_lookup(map_idx, lctx); + if (interleave && map_idx % 3 == 0) + do_lookup(0, lctx); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct loop_ctx lctx; + + lctx.task = bpf_get_current_task_btf(); + lctx.loop_hits = 0; + lctx.loop_important_hits = 0; + bpf_loop(10000, &loop, &lctx, 0); + __sync_add_and_fetch(&hits, lctx.loop_hits); + __sync_add_and_fetch(&important_hits, lctx.loop_important_hits); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From 6e945d57cc9f6e27893d57a419434a2859ba6f3f Mon Sep 17 00:00:00 2001 From: Jörn-Thorben Hinz Date: Wed, 22 Jun 2022 21:12:25 +0200 Subject: selftests/bpf: Test a BPF CC writing sk_pacing_* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test whether a TCP CC implemented in BPF is allowed to write sk_pacing_rate and sk_pacing_status in struct sock. This is needed when cong_control() is implemented and used. Signed-off-by: Jörn-Thorben Hinz Link: https://lore.kernel.org/r/20220622191227.898118-4-jthinz@mailbox.tu-berlin.de Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 19 +++++++ .../selftests/bpf/progs/tcp_ca_write_sk_pacing.c | 60 ++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e9a9a31b2ffe..e79f3f5a9d33 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -9,6 +9,7 @@ #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" +#include "tcp_ca_write_sk_pacing.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -322,6 +323,22 @@ static void test_rel_setsockopt(void) bpf_dctcp_release__destroy(rel_skel); } +static void test_write_sk_pacing(void) +{ + struct tcp_ca_write_sk_pacing *skel; + struct bpf_link *link; + + skel = tcp_ca_write_sk_pacing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_write_sk_pacing__destroy(skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -334,4 +351,6 @@ void test_bpf_tcp_ca(void) test_dctcp_fallback(); if (test__start_subtest("rel_setsockopt")) test_rel_setsockopt(); + if (test__start_subtest("write_sk_pacing")) + test_write_sk_pacing(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c new file mode 100644 index 000000000000..43447704cf0e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include + +char _license[] SEC("license") = "GPL"; + +#define USEC_PER_SEC 1000000UL + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/write_sk_pacing_init") +void BPF_PROG(write_sk_pacing_init, struct sock *sk) +{ +#ifdef ENABLE_ATOMICS_TESTS + __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE, + SK_PACING_NEEDED); +#else + sk->sk_pacing_status = SK_PACING_NEEDED; +#endif +} + +SEC("struct_ops/write_sk_pacing_cong_control") +void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + const struct tcp_sock *tp = tcp_sk(sk); + unsigned long rate = + ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) / + (tp->srtt_us ?: 1U << 3); + sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); +} + +SEC("struct_ops/write_sk_pacing_ssthresh") +__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/write_sk_pacing_undo_cwnd") +__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops write_sk_pacing = { + .init = (void *)write_sk_pacing_init, + .cong_control = (void *)write_sk_pacing_cong_control, + .ssthresh = (void *)write_sk_pacing_ssthresh, + .undo_cwnd = (void *)write_sk_pacing_undo_cwnd, + .name = "bpf_w_sk_pacing", +}; -- cgit v1.2.3-59-g8ed1b From 0735627d78caa56f219dc14608ce0bdbd045e07e Mon Sep 17 00:00:00 2001 From: Jörn-Thorben Hinz Date: Wed, 22 Jun 2022 21:12:26 +0200 Subject: selftests/bpf: Test an incomplete BPF CC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test whether a TCP CC implemented in BPF providing neither cong_avoid() nor cong_control() is correctly rejected. This check solely depends on tcp_register_congestion_control() now, which is invoked during bpf_map__attach_struct_ops(). Signed-off-by: Jörn-Thorben Hinz Link: https://lore.kernel.org/r/20220622191227.898118-5-jthinz@mailbox.tu-berlin.de Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 22 ++++++++++++++ .../selftests/bpf/progs/tcp_ca_incompl_cong_ops.c | 35 ++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e79f3f5a9d33..194d07310531 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -10,6 +10,7 @@ #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" #include "tcp_ca_write_sk_pacing.skel.h" +#include "tcp_ca_incompl_cong_ops.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -339,6 +340,25 @@ static void test_write_sk_pacing(void) tcp_ca_write_sk_pacing__destroy(skel); } +static void test_incompl_cong_ops(void) +{ + struct tcp_ca_incompl_cong_ops *skel; + struct bpf_link *link; + + skel = tcp_ca_incompl_cong_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + /* That cong_avoid() and cong_control() are missing is only reported at + * this point: + */ + link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops); + ASSERT_ERR_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_incompl_cong_ops__destroy(skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -353,4 +373,6 @@ void test_bpf_tcp_ca(void) test_rel_setsockopt(); if (test__start_subtest("write_sk_pacing")) test_write_sk_pacing(); + if (test__start_subtest("incompl_cong_ops")) + test_incompl_cong_ops(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c new file mode 100644 index 000000000000..7bb872fb22dd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include + +char _license[] SEC("license") = "GPL"; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/incompl_cong_ops_ssthresh") +__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/incompl_cong_ops_undo_cwnd") +__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops incompl_cong_ops = { + /* Intentionally leaving out any of the required cong_avoid() and + * cong_control() here. + */ + .ssthresh = (void *)incompl_cong_ops_ssthresh, + .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd, + .name = "bpf_incompl_ops", +}; -- cgit v1.2.3-59-g8ed1b From f14a3f644a1c5a2e2dbe6073f51793119a12e6ce Mon Sep 17 00:00:00 2001 From: Jörn-Thorben Hinz Date: Wed, 22 Jun 2022 21:12:27 +0200 Subject: selftests/bpf: Test a BPF CC implementing the unsupported get_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test whether a TCP CC implemented in BPF providing get_info() is rejected correctly. get_info() is unsupported in a BPF CC. The check for required functions in a BPF CC has moved, this test ensures unsupported functions are still rejected correctly. Signed-off-by: Jörn-Thorben Hinz Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220622191227.898118-6-jthinz@mailbox.tu-berlin.de Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c | 20 ++++++++++++++++++++ .../selftests/bpf/progs/tcp_ca_unsupp_cong_op.c | 21 +++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 194d07310531..2959a52ced06 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -11,6 +11,7 @@ #include "bpf_dctcp_release.skel.h" #include "tcp_ca_write_sk_pacing.skel.h" #include "tcp_ca_incompl_cong_ops.skel.h" +#include "tcp_ca_unsupp_cong_op.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -359,6 +360,23 @@ static void test_incompl_cong_ops(void) tcp_ca_incompl_cong_ops__destroy(skel); } +static void test_unsupp_cong_op(void) +{ + libbpf_print_fn_t old_print_fn; + struct tcp_ca_unsupp_cong_op *skel; + + err_str = "attach to unsupported member get_info"; + found = false; + old_print_fn = libbpf_set_print(libbpf_debug_print); + + skel = tcp_ca_unsupp_cong_op__open_and_load(); + ASSERT_NULL(skel, "open_and_load"); + ASSERT_EQ(found, true, "expected_err_msg"); + + tcp_ca_unsupp_cong_op__destroy(skel); + libbpf_set_print(old_print_fn); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -375,4 +393,6 @@ void test_bpf_tcp_ca(void) test_write_sk_pacing(); if (test__start_subtest("incompl_cong_ops")) test_incompl_cong_ops(); + if (test__start_subtest("unsupp_cong_op")) + test_unsupp_cong_op(); } diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c new file mode 100644 index 000000000000..c06f4a41c21a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/unsupp_cong_op_get_info") +size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + return 0; +} + +SEC(".struct_ops") +struct tcp_congestion_ops unsupp_cong_op = { + .get_info = (void *)unsupp_cong_op_get_info, + .name = "bpf_unsupp_op", +}; -- cgit v1.2.3-59-g8ed1b From 6dc7a0baf1a70b7d22662d38481824c14ddd80c5 Mon Sep 17 00:00:00 2001 From: Jörn-Thorben Hinz Date: Tue, 21 Jun 2022 09:01:16 +0200 Subject: selftests/bpf: Fix rare segfault in sock_fields prog test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test_sock_fields__detach() got called with a null pointer here when one of the CHECKs or ASSERTs up to the test_sock_fields__open_and_load() call resulted in a jump to the "done" label. A skeletons *__detach() is not safe to call with a null pointer, though. This led to a segfault. Go the easy route and only call test_sock_fields__destroy() which is null-pointer safe and includes detaching. Came across this while looking[1] to introduce the usage of bpf_tcp_helpers.h (included in progs/test_sock_fields.c) together with vmlinux.h. [1] https://lore.kernel.org/bpf/629bc069dd807d7ac646f836e9dca28bbc1108e2.camel@mailbox.tu-berlin.de/ Fixes: 8f50f16ff39d ("selftests/bpf: Extend verifier and bpf_sock tests for dst_port loads") Signed-off-by: Jörn-Thorben Hinz Signed-off-by: Andrii Nakryiko Reviewed-by: Jakub Sitnicki Reviewed-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20220621070116.307221-1-jthinz@mailbox.tu-berlin.de --- tools/testing/selftests/bpf/prog_tests/sock_fields.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9d211b5c22c4..7d23166c77af 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -394,7 +394,6 @@ void serial_test_sock_fields(void) test(); done: - test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); -- cgit v1.2.3-59-g8ed1b From 41188e9e9defa1678abbf860ad7f6dd1ba48ad1c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Fri, 24 Jun 2022 05:06:13 +0300 Subject: selftest/bpf: Test for use-after-free bug fix in inline_bpf_loop This test verifies that bpf_loop() inlining works as expected when address of `env->prog` is updated. This address is updated upon BPF program reallocation. Reallocation is handled by bpf_prog_realloc(), which reuses old memory if page boundary is not crossed. The value of `len` in the test is chosen to cross this boundary on bpf_loop() patching. Verify that the use-after-free bug in inline_bpf_loop() reported by Dan Carpenter is fixed. Signed-off-by: Eduard Zingerman Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220624020613.548108-3-eddyz87@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 39 ++++++++++++++++++++++ .../selftests/bpf/verifier/bpf_loop_inline.c | 11 ++++++ 2 files changed, 50 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 7fe897c66d81..f9d553fbf68a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -425,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self) } } +static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + /* This test was added to catch a specific use after free + * error, which happened upon BPF program reallocation. + * Reallocation is handled by core.c:bpf_prog_realloc, which + * reuses old memory if page boundary is not crossed. The + * value of `len` is chosen to cross this boundary on bpf_loop + * patching. + */ + const int len = getpagesize() - 25; + int callback_load_idx; + int callback_idx; + int i = 0; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1); + callback_load_idx = i; + insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, + BPF_REG_2, BPF_PSEUDO_FUNC, 0, + 777 /* filled below */); + insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop); + + while (i < len - 3) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + callback_idx = i; + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1; + self->func_info[1].insn_off = callback_idx; + self->prog_len = i; + assert(i == len); +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c index 232da07c93b5..2d0023659d88 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -244,6 +244,17 @@ .func_info_cnt = 3, BTF_TYPES }, +{ + "inline bpf_loop call in a big program", + .insns = {}, + .fill_helper = bpf_fill_big_prog_with_loop_1, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .result = ACCEPT, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, #undef HELPER_CALL_INSN #undef PSEUDO_CALL_INSN -- cgit v1.2.3-59-g8ed1b From 1da9e27415bfc54db25c8374331aaf5321185a1d Mon Sep 17 00:00:00 2001 From: liujing Date: Wed, 22 Jun 2022 08:12:37 -0400 Subject: tc-testing: gitignore, delete plugins directory when we modfying kernel, commit it to our environment building. we find a error that is "tools/testing/selftests/tc-testing/plugins" failed: No such file or directory" we find plugins directory is ignored in "tools/testing/selftests/tc-testing/.gitignore", but the plugins directory is need in "tools/testing/selftests/tc-testing/Makefile" Signed-off-by: liujing Link: https://lore.kernel.org/r/20220622121237.5832-1-liujing@cmss.chinamobile.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/tc-testing/.gitignore | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index d52f65de23b4..9fe1cef72728 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only __pycache__/ *.pyc -plugins/ *.xml *.tap tdc_config_local.py -- cgit v1.2.3-59-g8ed1b From f36600634282a519e1b0abea609acdc8731515d7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Jun 2022 14:15:13 -0700 Subject: libbpf: move xsk.{c,h} into selftests/bpf Remove deprecated xsk APIs from libbpf. But given we have selftests relying on this, move those files (with minimal adjustments to make them compilable) under selftests/bpf. We also remove all the removed APIs from libbpf.map, while overall keeping version inheritance chain, as most APIs are backwards compatible so there is no need to reassign them as LIBBPF_1.0.0 versions. Cc: Magnus Karlsson Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220627211527.2245459-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/Makefile | 2 +- tools/lib/bpf/libbpf.map | 12 - tools/lib/bpf/xsk.c | 1260 ----------------------------- tools/lib/bpf/xsk.h | 336 -------- tools/testing/selftests/bpf/Makefile | 2 + tools/testing/selftests/bpf/xdpxceiver.c | 2 +- tools/testing/selftests/bpf/xsk.c | 1264 ++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/xsk.h | 315 ++++++++ 9 files changed, 1584 insertions(+), 1611 deletions(-) delete mode 100644 tools/lib/bpf/xsk.c delete mode 100644 tools/lib/bpf/xsk.h create mode 100644 tools/testing/selftests/bpf/xsk.c create mode 100644 tools/testing/selftests/bpf/xsk.h (limited to 'tools/testing') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 31a1a9015902..5a3dfb56d78f 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ - netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ + netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ usdt.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index a1265b152027..4c904ef0b47e 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -237,7 +237,7 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ +SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 116a2a8ee7c2..da7a4f928452 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -147,12 +147,6 @@ LIBBPF_0.0.2 { btf_ext__new; btf_ext__reloc_func_info; btf_ext__reloc_line_info; - xsk_umem__create; - xsk_socket__create; - xsk_umem__delete; - xsk_socket__delete; - xsk_umem__fd; - xsk_socket__fd; bpf_program__get_prog_info_linear; bpf_program__bpil_addr_to_offs; bpf_program__bpil_offs_to_addr; @@ -183,7 +177,6 @@ LIBBPF_0.0.4 { perf_buffer__new; perf_buffer__new_raw; perf_buffer__poll; - xsk_umem__create; } LIBBPF_0.0.3; LIBBPF_0.0.5 { @@ -336,7 +329,6 @@ LIBBPF_0.2.0 { perf_buffer__buffer_fd; perf_buffer__epoll_fd; perf_buffer__consume_buffer; - xsk_socket__create_shared; } LIBBPF_0.1.0; LIBBPF_0.3.0 { @@ -348,8 +340,6 @@ LIBBPF_0.3.0 { btf__new_empty_split; btf__new_split; ring_buffer__epoll_fd; - xsk_setup_xdp_prog; - xsk_socket__update_xskmap; } LIBBPF_0.2.0; LIBBPF_0.4.0 { @@ -468,6 +458,4 @@ LIBBPF_1.0.0 { libbpf_bpf_link_type_str; libbpf_bpf_map_type_str; libbpf_bpf_prog_type_str; - - local: *; }; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c deleted file mode 100644 index af136f73b09d..000000000000 --- a/tools/lib/bpf/xsk.c +++ /dev/null @@ -1,1260 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * AF_XDP user-space access library. - * - * Copyright(c) 2018 - 2019 Intel Corporation. - * - * Author(s): Magnus Karlsson - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bpf.h" -#include "libbpf.h" -#include "libbpf_internal.h" -#include "xsk.h" - -/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal - * uses of deprecated APIs - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - -#ifndef SOL_XDP - #define SOL_XDP 283 -#endif - -#ifndef AF_XDP - #define AF_XDP 44 -#endif - -#ifndef PF_XDP - #define PF_XDP AF_XDP -#endif - -enum xsk_prog { - XSK_PROG_FALLBACK, - XSK_PROG_REDIRECT_FLAGS, -}; - -struct xsk_umem { - struct xsk_ring_prod *fill_save; - struct xsk_ring_cons *comp_save; - char *umem_area; - struct xsk_umem_config config; - int fd; - int refcount; - struct list_head ctx_list; - bool rx_ring_setup_done; - bool tx_ring_setup_done; -}; - -struct xsk_ctx { - struct xsk_ring_prod *fill; - struct xsk_ring_cons *comp; - __u32 queue_id; - struct xsk_umem *umem; - int refcount; - int ifindex; - struct list_head list; - int prog_fd; - int link_fd; - int xsks_map_fd; - char ifname[IFNAMSIZ]; - bool has_bpf_link; -}; - -struct xsk_socket { - struct xsk_ring_cons *rx; - struct xsk_ring_prod *tx; - __u64 outstanding_tx; - struct xsk_ctx *ctx; - struct xsk_socket_config config; - int fd; -}; - -struct xsk_nl_info { - bool xdp_prog_attached; - int ifindex; - int fd; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_ring_offset_v1 { - __u64 producer; - __u64 consumer; - __u64 desc; -}; - -/* Up until and including Linux 5.3 */ -struct xdp_mmap_offsets_v1 { - struct xdp_ring_offset_v1 rx; - struct xdp_ring_offset_v1 tx; - struct xdp_ring_offset_v1 fr; - struct xdp_ring_offset_v1 cr; -}; - -int xsk_umem__fd(const struct xsk_umem *umem) -{ - return umem ? umem->fd : -EINVAL; -} - -int xsk_socket__fd(const struct xsk_socket *xsk) -{ - return xsk ? xsk->fd : -EINVAL; -} - -static bool xsk_page_aligned(void *buffer) -{ - unsigned long addr = (unsigned long)buffer; - - return !(addr & (getpagesize() - 1)); -} - -static void xsk_set_umem_config(struct xsk_umem_config *cfg, - const struct xsk_umem_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; - cfg->flags = XSK_UMEM__DEFAULT_FLAGS; - return; - } - - cfg->fill_size = usr_cfg->fill_size; - cfg->comp_size = usr_cfg->comp_size; - cfg->frame_size = usr_cfg->frame_size; - cfg->frame_headroom = usr_cfg->frame_headroom; - cfg->flags = usr_cfg->flags; -} - -static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) -{ - if (!usr_cfg) { - cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; - cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg->libbpf_flags = 0; - cfg->xdp_flags = 0; - cfg->bind_flags = 0; - return 0; - } - - if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) - return -EINVAL; - - cfg->rx_size = usr_cfg->rx_size; - cfg->tx_size = usr_cfg->tx_size; - cfg->libbpf_flags = usr_cfg->libbpf_flags; - cfg->xdp_flags = usr_cfg->xdp_flags; - cfg->bind_flags = usr_cfg->bind_flags; - - return 0; -} - -static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) -{ - struct xdp_mmap_offsets_v1 off_v1; - - /* getsockopt on a kernel <= 5.3 has no flags fields. - * Copy over the offsets to the correct places in the >=5.4 format - * and put the flags where they would have been on that kernel. - */ - memcpy(&off_v1, off, sizeof(off_v1)); - - off->rx.producer = off_v1.rx.producer; - off->rx.consumer = off_v1.rx.consumer; - off->rx.desc = off_v1.rx.desc; - off->rx.flags = off_v1.rx.consumer + sizeof(__u32); - - off->tx.producer = off_v1.tx.producer; - off->tx.consumer = off_v1.tx.consumer; - off->tx.desc = off_v1.tx.desc; - off->tx.flags = off_v1.tx.consumer + sizeof(__u32); - - off->fr.producer = off_v1.fr.producer; - off->fr.consumer = off_v1.fr.consumer; - off->fr.desc = off_v1.fr.desc; - off->fr.flags = off_v1.fr.consumer + sizeof(__u32); - - off->cr.producer = off_v1.cr.producer; - off->cr.consumer = off_v1.cr.consumer; - off->cr.desc = off_v1.cr.desc; - off->cr.flags = off_v1.cr.consumer + sizeof(__u32); -} - -static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) -{ - socklen_t optlen; - int err; - - optlen = sizeof(*off); - err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); - if (err) - return err; - - if (optlen == sizeof(*off)) - return 0; - - if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { - xsk_mmap_offsets_v1(off); - return 0; - } - - return -EINVAL; -} - -static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp) -{ - struct xdp_mmap_offsets off; - void *map; - int err; - - err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, - &umem->config.fill_size, - sizeof(umem->config.fill_size)); - if (err) - return -errno; - - err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, - &umem->config.comp_size, - sizeof(umem->config.comp_size)); - if (err) - return -errno; - - err = xsk_get_mmap_offsets(fd, &off); - if (err) - return -errno; - - map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - XDP_UMEM_PGOFF_FILL_RING); - if (map == MAP_FAILED) - return -errno; - - fill->mask = umem->config.fill_size - 1; - fill->size = umem->config.fill_size; - fill->producer = map + off.fr.producer; - fill->consumer = map + off.fr.consumer; - fill->flags = map + off.fr.flags; - fill->ring = map + off.fr.desc; - fill->cached_cons = umem->config.fill_size; - - map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - XDP_UMEM_PGOFF_COMPLETION_RING); - if (map == MAP_FAILED) { - err = -errno; - goto out_mmap; - } - - comp->mask = umem->config.comp_size - 1; - comp->size = umem->config.comp_size; - comp->producer = map + off.cr.producer; - comp->consumer = map + off.cr.consumer; - comp->flags = map + off.cr.flags; - comp->ring = map + off.cr.desc; - - return 0; - -out_mmap: - munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); - return err; -} - -DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) -int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xdp_umem_reg mr; - struct xsk_umem *umem; - int err; - - if (!umem_area || !umem_ptr || !fill || !comp) - return -EFAULT; - if (!size && !xsk_page_aligned(umem_area)) - return -EINVAL; - - umem = calloc(1, sizeof(*umem)); - if (!umem) - return -ENOMEM; - - umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); - if (umem->fd < 0) { - err = -errno; - goto out_umem_alloc; - } - - umem->umem_area = umem_area; - INIT_LIST_HEAD(&umem->ctx_list); - xsk_set_umem_config(&umem->config, usr_config); - - memset(&mr, 0, sizeof(mr)); - mr.addr = (uintptr_t)umem_area; - mr.len = size; - mr.chunk_size = umem->config.frame_size; - mr.headroom = umem->config.frame_headroom; - mr.flags = umem->config.flags; - - err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); - if (err) { - err = -errno; - goto out_socket; - } - - err = xsk_create_umem_rings(umem, umem->fd, fill, comp); - if (err) - goto out_socket; - - umem->fill_save = fill; - umem->comp_save = comp; - *umem_ptr = umem; - return 0; - -out_socket: - close(umem->fd); -out_umem_alloc: - free(umem); - return err; -} - -struct xsk_umem_config_v1 { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; -}; - -COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) -int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, - __u64 size, struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *usr_config) -{ - struct xsk_umem_config config; - - memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1)); - config.flags = 0; - - return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, - &config); -} - -static enum xsk_prog get_xsk_prog(void) -{ - enum xsk_prog detected = XSK_PROG_FALLBACK; - __u32 size_out, retval, duration; - char data_in = 0, data_out; - struct bpf_insn insns[] = { - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); - - map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (map_fd < 0) - return detected; - - insns[0].imm = map_fd; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) { - close(map_fd); - return detected; - } - - ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); - if (!ret && retval == XDP_PASS) - detected = XSK_PROG_REDIRECT_FLAGS; - close(prog_fd); - close(map_fd); - return detected; -} - -static int xsk_load_xdp_prog(struct xsk_socket *xsk) -{ - static const int log_buf_size = 16 * 1024; - struct xsk_ctx *ctx = xsk->ctx; - char log_buf[log_buf_size]; - int prog_fd; - - /* This is the fallback C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * int ret, index = ctx->rx_queue_index; - * - * // A set entry here means that the correspnding queue_id - * // has an active AF_XDP socket bound to it. - * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); - * if (ret > 0) - * return ret; - * - * // Fallback for pre-5.3 kernels, not supporting default - * // action in the flags parameter. - * if (bpf_map_lookup_elem(&xsks_map, &index)) - * return bpf_redirect_map(&xsks_map, index, 0); - * return XDP_PASS; - * } - */ - struct bpf_insn prog[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r2 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* if w0 != 0 goto pc+13 */ - BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), - /* r2 = r10 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - /* r2 += -4 */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* call bpf_map_lookup_elem */ - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - /* r1 = r0 */ - BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - /* r0 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto pc+5 */ - BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), - /* r2 = *(u32 *)(r10 - 4) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = 0 */ - BPF_MOV64_IMM(BPF_REG_3, 0), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - /* The jumps are to this instruction */ - BPF_EXIT_INSN(), - }; - - /* This is the post-5.3 kernel C-program: - * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) - * { - * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); - * } - */ - struct bpf_insn prog_redirect_flags[] = { - /* r2 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), - /* r1 = xskmap[] */ - BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), - /* r3 = XDP_PASS */ - BPF_MOV64_IMM(BPF_REG_3, 2), - /* call bpf_redirect_map */ - BPF_EMIT_CALL(BPF_FUNC_redirect_map), - BPF_EXIT_INSN(), - }; - size_t insns_cnt[] = {ARRAY_SIZE(prog), - ARRAY_SIZE(prog_redirect_flags), - }; - struct bpf_insn *progs[] = {prog, prog_redirect_flags}; - enum xsk_prog option = get_xsk_prog(); - LIBBPF_OPTS(bpf_prog_load_opts, opts, - .log_buf = log_buf, - .log_size = log_buf_size, - ); - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", - progs[option], insns_cnt[option], &opts); - if (prog_fd < 0) { - pr_warn("BPF log buffer:\n%s", log_buf); - return prog_fd; - } - - ctx->prog_fd = prog_fd; - return 0; -} - -static int xsk_create_bpf_link(struct xsk_socket *xsk) -{ - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int link_fd; - int err; - - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); - if (err) { - pr_warn("getting XDP prog id failed\n"); - return err; - } - - /* if there's a netlink-based XDP prog loaded on interface, bail out - * and ask user to do the removal by himself - */ - if (prog_id) { - pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); - return -EINVAL; - } - - opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); - - link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); - if (link_fd < 0) { - pr_warn("bpf_link_create failed: %s\n", strerror(errno)); - return link_fd; - } - - ctx->link_fd = link_fd; - return 0; -} - -static int xsk_get_max_queues(struct xsk_socket *xsk) -{ - struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; - struct xsk_ctx *ctx = xsk->ctx; - struct ifreq ifr = {}; - int fd, err, ret; - - fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); - if (fd < 0) - return -errno; - - ifr.ifr_data = (void *)&channels; - libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); - err = ioctl(fd, SIOCETHTOOL, &ifr); - if (err && errno != EOPNOTSUPP) { - ret = -errno; - goto out; - } - - if (err) { - /* If the device says it has no channels, then all traffic - * is sent to a single stream, so max queues = 1. - */ - ret = 1; - } else { - /* Take the max of rx, tx, combined. Drivers return - * the number of channels in different ways. - */ - ret = max(channels.max_rx, channels.max_tx); - ret = max(ret, (int)channels.max_combined); - } - -out: - close(fd); - return ret; -} - -static int xsk_create_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - int max_queues; - int fd; - - max_queues = xsk_get_max_queues(xsk); - if (max_queues < 0) - return max_queues; - - fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, NULL); - if (fd < 0) - return fd; - - ctx->xsks_map_fd = fd; - - return 0; -} - -static void xsk_delete_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - - bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); - close(ctx->xsks_map_fd); -} - -static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) -{ - __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); - __u32 map_len = sizeof(struct bpf_map_info); - struct bpf_prog_info prog_info = {}; - struct xsk_ctx *ctx = xsk->ctx; - struct bpf_map_info map_info; - int fd, err; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - return err; - - num_maps = prog_info.nr_map_ids; - - map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); - if (!map_ids) - return -ENOMEM; - - memset(&prog_info, 0, prog_len); - prog_info.nr_map_ids = num_maps; - prog_info.map_ids = (__u64)(unsigned long)map_ids; - - err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); - if (err) - goto out_map_ids; - - ctx->xsks_map_fd = -1; - - for (i = 0; i < prog_info.nr_map_ids; i++) { - fd = bpf_map_get_fd_by_id(map_ids[i]); - if (fd < 0) - continue; - - memset(&map_info, 0, map_len); - err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); - if (err) { - close(fd); - continue; - } - - if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { - ctx->xsks_map_fd = fd; - break; - } - - close(fd); - } - - if (ctx->xsks_map_fd == -1) - err = -ENOENT; - -out_map_ids: - free(map_ids); - return err; -} - -static int xsk_set_bpf_maps(struct xsk_socket *xsk) -{ - struct xsk_ctx *ctx = xsk->ctx; - - return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, - &xsk->fd, 0); -} - -static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) -{ - struct bpf_link_info link_info; - __u32 link_len; - __u32 id = 0; - int err; - int fd; - - while (true) { - err = bpf_link_get_next_id(id, &id); - if (err) { - if (errno == ENOENT) { - err = 0; - break; - } - pr_warn("can't get next link: %s\n", strerror(errno)); - break; - } - - fd = bpf_link_get_fd_by_id(id); - if (fd < 0) { - if (errno == ENOENT) - continue; - pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); - err = -errno; - break; - } - - link_len = sizeof(struct bpf_link_info); - memset(&link_info, 0, link_len); - err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); - if (err) { - pr_warn("can't get link info: %s\n", strerror(errno)); - close(fd); - break; - } - if (link_info.type == BPF_LINK_TYPE_XDP) { - if (link_info.xdp.ifindex == ifindex) { - *link_fd = fd; - if (prog_id) - *prog_id = link_info.prog_id; - break; - } - } - close(fd); - } - - return err; -} - -static bool xsk_probe_bpf_link(void) -{ - LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); - struct bpf_insn insns[2] = { - BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), - BPF_EXIT_INSN() - }; - int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); - int ifindex_lo = 1; - bool ret = false; - int err; - - err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); - if (err) - return ret; - - if (link_fd >= 0) - return true; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); - if (prog_fd < 0) - return ret; - - link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); - close(prog_fd); - - if (link_fd >= 0) { - ret = true; - close(link_fd); - } - - return ret; -} - -static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) -{ - char ifname[IFNAMSIZ]; - struct xsk_ctx *ctx; - char *interface; - - ctx = calloc(1, sizeof(*ctx)); - if (!ctx) - return -ENOMEM; - - interface = if_indextoname(ifindex, &ifname[0]); - if (!interface) { - free(ctx); - return -errno; - } - - ctx->ifindex = ifindex; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); - - xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); - - return 0; -} - -static int xsk_init_xdp_res(struct xsk_socket *xsk, - int *xsks_map_fd) -{ - struct xsk_ctx *ctx = xsk->ctx; - int err; - - err = xsk_create_bpf_maps(xsk); - if (err) - return err; - - err = xsk_load_xdp_prog(xsk); - if (err) - goto err_load_xdp_prog; - - if (ctx->has_bpf_link) - err = xsk_create_bpf_link(xsk); - else - err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd, - xsk->config.xdp_flags); - - if (err) - goto err_attach_xdp_prog; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_bpf_maps; - - return err; - -err_set_bpf_maps: - if (ctx->has_bpf_link) - close(ctx->link_fd); - else - bpf_set_link_xdp_fd(ctx->ifindex, -1, 0); -err_attach_xdp_prog: - close(ctx->prog_fd); -err_load_xdp_prog: - xsk_delete_bpf_maps(xsk); - return err; -} - -static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) -{ - struct xsk_ctx *ctx = xsk->ctx; - int err; - - ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (ctx->prog_fd < 0) { - err = -errno; - goto err_prog_fd; - } - err = xsk_lookup_bpf_maps(xsk); - if (err) - goto err_lookup_maps; - - if (!xsk->rx) - return err; - - err = xsk_set_bpf_maps(xsk); - if (err) - goto err_set_maps; - - return err; - -err_set_maps: - close(ctx->xsks_map_fd); -err_lookup_maps: - close(ctx->prog_fd); -err_prog_fd: - if (ctx->has_bpf_link) - close(ctx->link_fd); - return err; -} - -static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) -{ - struct xsk_socket *xsk = _xdp; - struct xsk_ctx *ctx = xsk->ctx; - __u32 prog_id = 0; - int err; - - if (ctx->has_bpf_link) - err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); - else - err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags); - - if (err) - return err; - - err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : - xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); - - if (!err && xsks_map_fd) - *xsks_map_fd = ctx->xsks_map_fd; - - return err; -} - -static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, - __u32 queue_id) -{ - struct xsk_ctx *ctx; - - if (list_empty(&umem->ctx_list)) - return NULL; - - list_for_each_entry(ctx, &umem->ctx_list, list) { - if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { - ctx->refcount++; - return ctx; - } - } - - return NULL; -} - -static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) -{ - struct xsk_umem *umem = ctx->umem; - struct xdp_mmap_offsets off; - int err; - - if (--ctx->refcount) - return; - - if (!unmap) - goto out_free; - - err = xsk_get_mmap_offsets(umem->fd, &off); - if (err) - goto out_free; - - munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * - sizeof(__u64)); - munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * - sizeof(__u64)); - -out_free: - list_del(&ctx->list); - free(ctx); -} - -static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, - struct xsk_umem *umem, int ifindex, - const char *ifname, __u32 queue_id, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp) -{ - struct xsk_ctx *ctx; - int err; - - ctx = calloc(1, sizeof(*ctx)); - if (!ctx) - return NULL; - - if (!umem->fill_save) { - err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); - if (err) { - free(ctx); - return NULL; - } - } else if (umem->fill_save != fill || umem->comp_save != comp) { - /* Copy over rings to new structs. */ - memcpy(fill, umem->fill_save, sizeof(*fill)); - memcpy(comp, umem->comp_save, sizeof(*comp)); - } - - ctx->ifindex = ifindex; - ctx->refcount = 1; - ctx->umem = umem; - ctx->queue_id = queue_id; - libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); - - ctx->fill = fill; - ctx->comp = comp; - list_add(&ctx->list, &umem->ctx_list); - return ctx; -} - -static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) -{ - free(xsk->ctx); - free(xsk); -} - -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) -{ - xsk->ctx->xsks_map_fd = fd; - return xsk_set_bpf_maps(xsk); -} - -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) -{ - struct xsk_socket *xsk; - int res; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - res = xsk_create_xsk_struct(ifindex, xsk); - if (res) { - free(xsk); - return -EINVAL; - } - - res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); - - xsk_destroy_xsk_struct(xsk); - - return res; -} - -int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *usr_config) -{ - bool unmap, rx_setup_done = false, tx_setup_done = false; - void *rx_map = NULL, *tx_map = NULL; - struct sockaddr_xdp sxdp = {}; - struct xdp_mmap_offsets off; - struct xsk_socket *xsk; - struct xsk_ctx *ctx; - int err, ifindex; - - if (!umem || !xsk_ptr || !(rx || tx)) - return -EFAULT; - - unmap = umem->fill_save != fill; - - xsk = calloc(1, sizeof(*xsk)); - if (!xsk) - return -ENOMEM; - - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_xsk_alloc; - - xsk->outstanding_tx = 0; - ifindex = if_nametoindex(ifname); - if (!ifindex) { - err = -errno; - goto out_xsk_alloc; - } - - if (umem->refcount++ > 0) { - xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); - if (xsk->fd < 0) { - err = -errno; - goto out_xsk_alloc; - } - } else { - xsk->fd = umem->fd; - rx_setup_done = umem->rx_ring_setup_done; - tx_setup_done = umem->tx_ring_setup_done; - } - - ctx = xsk_get_ctx(umem, ifindex, queue_id); - if (!ctx) { - if (!fill || !comp) { - err = -EFAULT; - goto out_socket; - } - - ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, - fill, comp); - if (!ctx) { - err = -ENOMEM; - goto out_socket; - } - } - xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); - - if (rx && !rx_setup_done) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, - &xsk->config.rx_size, - sizeof(xsk->config.rx_size)); - if (err) { - err = -errno; - goto out_put_ctx; - } - if (xsk->fd == umem->fd) - umem->rx_ring_setup_done = true; - } - if (tx && !tx_setup_done) { - err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, - &xsk->config.tx_size, - sizeof(xsk->config.tx_size)); - if (err) { - err = -errno; - goto out_put_ctx; - } - if (xsk->fd == umem->fd) - umem->tx_ring_setup_done = true; - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (err) { - err = -errno; - goto out_put_ctx; - } - - if (rx) { - rx_map = mmap(NULL, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_RX_RING); - if (rx_map == MAP_FAILED) { - err = -errno; - goto out_put_ctx; - } - - rx->mask = xsk->config.rx_size - 1; - rx->size = xsk->config.rx_size; - rx->producer = rx_map + off.rx.producer; - rx->consumer = rx_map + off.rx.consumer; - rx->flags = rx_map + off.rx.flags; - rx->ring = rx_map + off.rx.desc; - rx->cached_prod = *rx->producer; - rx->cached_cons = *rx->consumer; - } - xsk->rx = rx; - - if (tx) { - tx_map = mmap(NULL, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, - xsk->fd, XDP_PGOFF_TX_RING); - if (tx_map == MAP_FAILED) { - err = -errno; - goto out_mmap_rx; - } - - tx->mask = xsk->config.tx_size - 1; - tx->size = xsk->config.tx_size; - tx->producer = tx_map + off.tx.producer; - tx->consumer = tx_map + off.tx.consumer; - tx->flags = tx_map + off.tx.flags; - tx->ring = tx_map + off.tx.desc; - tx->cached_prod = *tx->producer; - /* cached_cons is r->size bigger than the real consumer pointer - * See xsk_prod_nb_free - */ - tx->cached_cons = *tx->consumer + xsk->config.tx_size; - } - xsk->tx = tx; - - sxdp.sxdp_family = PF_XDP; - sxdp.sxdp_ifindex = ctx->ifindex; - sxdp.sxdp_queue_id = ctx->queue_id; - if (umem->refcount > 1) { - sxdp.sxdp_flags |= XDP_SHARED_UMEM; - sxdp.sxdp_shared_umem_fd = umem->fd; - } else { - sxdp.sxdp_flags = xsk->config.bind_flags; - } - - err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); - if (err) { - err = -errno; - goto out_mmap_tx; - } - - ctx->prog_fd = -1; - - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { - err = __xsk_setup_xdp_prog(xsk, NULL); - if (err) - goto out_mmap_tx; - } - - *xsk_ptr = xsk; - umem->fill_save = NULL; - umem->comp_save = NULL; - return 0; - -out_mmap_tx: - if (tx) - munmap(tx_map, off.tx.desc + - xsk->config.tx_size * sizeof(struct xdp_desc)); -out_mmap_rx: - if (rx) - munmap(rx_map, off.rx.desc + - xsk->config.rx_size * sizeof(struct xdp_desc)); -out_put_ctx: - xsk_put_ctx(ctx, unmap); -out_socket: - if (--umem->refcount) - close(xsk->fd); -out_xsk_alloc: - free(xsk); - return err; -} - -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, - const struct xsk_socket_config *usr_config) -{ - if (!umem) - return -EFAULT; - - return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, - rx, tx, umem->fill_save, - umem->comp_save, usr_config); -} - -int xsk_umem__delete(struct xsk_umem *umem) -{ - struct xdp_mmap_offsets off; - int err; - - if (!umem) - return 0; - - if (umem->refcount) - return -EBUSY; - - err = xsk_get_mmap_offsets(umem->fd, &off); - if (!err && umem->fill_save && umem->comp_save) { - munmap(umem->fill_save->ring - off.fr.desc, - off.fr.desc + umem->config.fill_size * sizeof(__u64)); - munmap(umem->comp_save->ring - off.cr.desc, - off.cr.desc + umem->config.comp_size * sizeof(__u64)); - } - - close(umem->fd); - free(umem); - - return 0; -} - -void xsk_socket__delete(struct xsk_socket *xsk) -{ - size_t desc_sz = sizeof(struct xdp_desc); - struct xdp_mmap_offsets off; - struct xsk_umem *umem; - struct xsk_ctx *ctx; - int err; - - if (!xsk) - return; - - ctx = xsk->ctx; - umem = ctx->umem; - if (ctx->prog_fd != -1) { - xsk_delete_bpf_maps(xsk); - close(ctx->prog_fd); - if (ctx->has_bpf_link) - close(ctx->link_fd); - } - - err = xsk_get_mmap_offsets(xsk->fd, &off); - if (!err) { - if (xsk->rx) { - munmap(xsk->rx->ring - off.rx.desc, - off.rx.desc + xsk->config.rx_size * desc_sz); - } - if (xsk->tx) { - munmap(xsk->tx->ring - off.tx.desc, - off.tx.desc + xsk->config.tx_size * desc_sz); - } - } - - xsk_put_ctx(ctx, true); - - umem->refcount--; - /* Do not close an fd that also has an associated umem connected - * to it. - */ - if (xsk->fd != umem->fd) - close(xsk->fd); - free(xsk); -} diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h deleted file mode 100644 index 64e9c57fd792..000000000000 --- a/tools/lib/bpf/xsk.h +++ /dev/null @@ -1,336 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ - -/* - * AF_XDP user-space access library. - * - * Copyright (c) 2018 - 2019 Intel Corporation. - * Copyright (c) 2019 Facebook - * - * Author(s): Magnus Karlsson - */ - -#ifndef __LIBBPF_XSK_H -#define __LIBBPF_XSK_H - -#include -#include -#include -#include - -#include "libbpf.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* This whole API has been deprecated and moved to libxdp that can be found at - * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so - * it should just be linking with libxdp instead of libbpf for this set of - * functionality. If not, please submit a bug report on the aforementioned page. - */ - -/* Load-Acquire Store-Release barriers used by the XDP socket - * library. The following macros should *NOT* be considered part of - * the xsk.h API, and is subject to change anytime. - * - * LIBRARY INTERNAL - */ - -#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) -#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) - -#if defined(__i386__) || defined(__x86_64__) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile("" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile("" : : : "memory"); \ - ___p1; \ - }) -#elif defined(__aarch64__) -# define libbpf_smp_store_release(p, v) \ - asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1; \ - asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ - ___p1; \ - }) -#elif defined(__riscv) -# define libbpf_smp_store_release(p, v) \ - do { \ - asm volatile ("fence rw,w" : : : "memory"); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -# define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - asm volatile ("fence r,rw" : : : "memory"); \ - ___p1; \ - }) -#endif - -#ifndef libbpf_smp_store_release -#define libbpf_smp_store_release(p, v) \ - do { \ - __sync_synchronize(); \ - __XSK_WRITE_ONCE(*p, v); \ - } while (0) -#endif - -#ifndef libbpf_smp_load_acquire -#define libbpf_smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ - __sync_synchronize(); \ - ___p1; \ - }) -#endif - -/* LIBRARY INTERNAL -- END */ - -/* Do not access these members directly. Use the functions below. */ -#define DEFINE_XSK_RING(name) \ -struct name { \ - __u32 cached_prod; \ - __u32 cached_cons; \ - __u32 mask; \ - __u32 size; \ - __u32 *producer; \ - __u32 *consumer; \ - void *ring; \ - __u32 *flags; \ -} - -DEFINE_XSK_RING(xsk_ring_prod); -DEFINE_XSK_RING(xsk_ring_cons); - -/* For a detailed explanation on the memory barriers associated with the - * ring, please take a look at net/xdp/xsk_queue.h. - */ - -struct xsk_umem; -struct xsk_socket; - -static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, - __u32 idx) -{ - __u64 *addrs = (__u64 *)fill->ring; - - return &addrs[idx & fill->mask]; -} - -static inline const __u64 * -xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) -{ - const __u64 *addrs = (const __u64 *)comp->ring; - - return &addrs[idx & comp->mask]; -} - -static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, - __u32 idx) -{ - struct xdp_desc *descs = (struct xdp_desc *)tx->ring; - - return &descs[idx & tx->mask]; -} - -static inline const struct xdp_desc * -xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) -{ - const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; - - return &descs[idx & rx->mask]; -} - -static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) -{ - return *r->flags & XDP_RING_NEED_WAKEUP; -} - -static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) -{ - __u32 free_entries = r->cached_cons - r->cached_prod; - - if (free_entries >= nb) - return free_entries; - - /* Refresh the local tail pointer. - * cached_cons is r->size bigger than the real consumer pointer so - * that this addition can be avoided in the more frequently - * executed code that computs free_entries in the beginning of - * this function. Without this optimization it whould have been - * free_entries = r->cached_prod - r->cached_cons + r->size. - */ - r->cached_cons = libbpf_smp_load_acquire(r->consumer); - r->cached_cons += r->size; - - return r->cached_cons - r->cached_prod; -} - -static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) -{ - __u32 entries = r->cached_prod - r->cached_cons; - - if (entries == 0) { - r->cached_prod = libbpf_smp_load_acquire(r->producer); - entries = r->cached_prod - r->cached_cons; - } - - return (entries > nb) ? nb : entries; -} - -static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) -{ - if (xsk_prod_nb_free(prod, nb) < nb) - return 0; - - *idx = prod->cached_prod; - prod->cached_prod += nb; - - return nb; -} - -static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) -{ - /* Make sure everything has been written to the ring before indicating - * this to the kernel by writing the producer pointer. - */ - libbpf_smp_store_release(prod->producer, *prod->producer + nb); -} - -static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) -{ - __u32 entries = xsk_cons_nb_avail(cons, nb); - - if (entries > 0) { - *idx = cons->cached_cons; - cons->cached_cons += entries; - } - - return entries; -} - -static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) -{ - cons->cached_cons -= nb; -} - -static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) -{ - /* Make sure data has been read before indicating we are done - * with the entries by updating the consumer pointer. - */ - libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); - -} - -static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) -{ - return &((char *)umem_area)[addr]; -} - -static inline __u64 xsk_umem__extract_addr(__u64 addr) -{ - return addr & XSK_UNALIGNED_BUF_ADDR_MASK; -} - -static inline __u64 xsk_umem__extract_offset(__u64 addr) -{ - return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; -} - -static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) -{ - return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); -} - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__fd(const struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__fd(const struct xsk_socket *xsk); - -#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 -#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 -#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ -#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) -#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 -#define XSK_UMEM__DEFAULT_FLAGS 0 - -struct xsk_umem_config { - __u32 fill_size; - __u32 comp_size; - __u32 frame_size; - __u32 frame_headroom; - __u32 flags; -}; - -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); - -/* Flags for the libbpf_flags field. */ -#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) - -struct xsk_socket_config { - __u32 rx_size; - __u32 tx_size; - __u32 libbpf_flags; - __u32 xdp_flags; - __u16 bind_flags; -}; - -/* Set config to NULL to get the default configuration. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_2(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__create_v0_0_4(struct xsk_umem **umem, - void *umem_area, __u64 size, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_umem_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create(struct xsk_socket **xsk, - const char *ifname, __u32 queue_id, - struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - const struct xsk_socket_config *config); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, - const char *ifname, - __u32 queue_id, struct xsk_umem *umem, - struct xsk_ring_cons *rx, - struct xsk_ring_prod *tx, - struct xsk_ring_prod *fill, - struct xsk_ring_cons *comp, - const struct xsk_socket_config *config); - -/* Returns 0 for success and -EBUSY if the umem is still in use. */ -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -int xsk_umem__delete(struct xsk_umem *umem); -LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp") -void xsk_socket__delete(struct xsk_socket *xsk); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* __LIBBPF_XSK_H */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4fbd88a8ed9e..e32a28fe8bc1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -230,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS) $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) +$(OUTPUT)/xsk.o: $(BPFOBJ) +$(OUTPUT)/xdpxceiver: $(OUTPUT)/xsk.o BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index e5992a6b5e09..019c567b6b4e 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -97,7 +97,7 @@ #include #include #include -#include +#include "xsk.h" #include "xdpxceiver.h" #include "../kselftest.h" diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c new file mode 100644 index 000000000000..eb50c3f336f8 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk.c @@ -0,0 +1,1264 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) + +enum xsk_prog { + XSK_PROG_FALLBACK, + XSK_PROG_REDIRECT_FLAGS, +}; + +struct xsk_umem { + struct xsk_ring_prod *fill_save; + struct xsk_ring_cons *comp_save; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; + struct list_head ctx_list; + bool rx_ring_setup_done; + bool tx_ring_setup_done; +}; + +struct xsk_ctx { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + __u32 queue_id; + struct xsk_umem *umem; + int refcount; + int ifindex; + struct list_head list; + int prog_fd; + int link_fd; + int xsks_map_fd; + char ifname[IFNAMSIZ]; + bool has_bpf_link; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; + cfg->flags = usr_cfg->flags; +} + +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return 0; + } + + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; + + return 0; +} + +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ + struct xdp_mmap_offsets_v1 off_v1; + + /* getsockopt on a kernel <= 5.3 has no flags fields. + * Copy over the offsets to the correct places in the >=5.4 format + * and put the flags where they would have been on that kernel. + */ + memcpy(&off_v1, off, sizeof(off_v1)); + + off->rx.producer = off_v1.rx.producer; + off->rx.consumer = off_v1.rx.consumer; + off->rx.desc = off_v1.rx.desc; + off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + + off->tx.producer = off_v1.tx.producer; + off->tx.consumer = off_v1.tx.consumer; + off->tx.desc = off_v1.tx.desc; + off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + + off->fr.producer = off_v1.fr.producer; + off->fr.consumer = off_v1.fr.consumer; + off->fr.desc = off_v1.fr.desc; + off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + + off->cr.producer = off_v1.cr.producer; + off->cr.consumer = off_v1.cr.consumer; + off->cr.desc = off_v1.cr.desc; + off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ + socklen_t optlen; + int err; + + optlen = sizeof(*off); + err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); + if (err) + return err; + + if (optlen == sizeof(*off)) + return 0; + + if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { + xsk_mmap_offsets_v1(off); + return 0; + } + + return -EINVAL; +} + +static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xdp_mmap_offsets off; + void *map; + int err; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) + return -errno; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) + return -errno; + + err = xsk_get_mmap_offsets(fd, &off); + if (err) + return -errno; + + map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) + return -errno; + + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; + comp->ring = map + off.cr.desc; + + return 0; + +out_mmap: + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); + return err; +} + +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_umem_reg mr; + struct xsk_umem *umem; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + INIT_LIST_HEAD(&umem->ctx_list); + xsk_set_umem_config(&umem->config, usr_config); + + memset(&mr, 0, sizeof(mr)); + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + mr.flags = umem->config.flags; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + + err = xsk_create_umem_rings(umem, umem->fd, fill, comp); + if (err) + goto out_socket; + + umem->fill_save = fill; + umem->comp_save = comp; + *umem_ptr = umem; + return 0; + +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +struct xsk_umem_config_v1 { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +static enum xsk_prog get_xsk_prog(void) +{ + enum xsk_prog detected = XSK_PROG_FALLBACK; + char data_in = 0, data_out; + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data_in, + .data_size_in = 1, + .data_out = &data_out, + ); + + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); + + map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); + if (map_fd < 0) + return detected; + + insns[0].imm = map_fd; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) { + close(map_fd); + return detected; + } + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ret && opts.retval == XDP_PASS) + detected = XSK_PROG_REDIRECT_FLAGS; + close(prog_fd); + close(map_fd); + return detected; +} + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + static const int log_buf_size = 16 * 1024; + struct xsk_ctx *ctx = xsk->ctx; + char log_buf[log_buf_size]; + int prog_fd; + + /* This is the fallback C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int ret, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. + * if (bpf_map_lookup_elem(&xsks_map, &index)) + * return bpf_redirect_map(&xsks_map, index, 0); + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* call bpf_map_lookup_elem */ + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + + /* This is the post-5.3 kernel C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); + * } + */ + struct bpf_insn prog_redirect_flags[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + size_t insns_cnt[] = {ARRAY_SIZE(prog), + ARRAY_SIZE(prog_redirect_flags), + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_size, + ); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", + progs[option], insns_cnt[option], &opts); + if (prog_fd < 0) { + pr_warn("BPF log buffer:\n%s", log_buf); + return prog_fd; + } + + ctx->prog_fd = prog_fd; + return 0; +} + +static int xsk_create_bpf_link(struct xsk_socket *xsk) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int link_fd; + int err; + + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); + if (err) { + pr_warn("getting XDP prog id failed\n"); + return err; + } + + /* if there's a netlink-based XDP prog loaded on interface, bail out + * and ask user to do the removal by himself + */ + if (prog_id) { + pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); + return -EINVAL; + } + + opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); + + link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); + if (link_fd < 0) { + pr_warn("bpf_link_create failed: %s\n", strerror(errno)); + return link_fd; + } + + ctx->link_fd = link_fd; + return 0; +} + +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; + struct xsk_ctx *ctx = xsk->ctx; + struct ifreq ifr = {}; + int fd, err, ret; + + fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + ifr.ifr_data = (void *)&channels; + libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (err) { + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + } else { + /* Take the max of rx, tx, combined. Drivers return + * the number of channels in different ways. + */ + ret = max(channels.max_rx, channels.max_tx); + ret = max(ret, (int)channels.max_combined); + } + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, NULL); + if (fd < 0) + return fd; + + ctx->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + + bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); + close(ctx->xsks_map_fd); +} + +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) +{ + __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); + __u32 map_len = sizeof(struct bpf_map_info); + struct bpf_prog_info prog_info = {}; + struct xsk_ctx *ctx = xsk->ctx; + struct bpf_map_info map_info; + int fd, err; + + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + ctx->xsks_map_fd = -1; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) + continue; + + memset(&map_info, 0, map_len); + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) { + close(fd); + continue; + } + + if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { + ctx->xsks_map_fd = fd; + break; + } + + close(fd); + } + + if (ctx->xsks_map_fd == -1) + err = -ENOENT; + +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_set_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + + return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, + &xsk->fd, 0); +} + +static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) +{ + struct bpf_link_info link_info; + __u32 link_len; + __u32 id = 0; + int err; + int fd; + + while (true) { + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_warn("can't get next link: %s\n", strerror(errno)); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); + err = -errno; + break; + } + + link_len = sizeof(struct bpf_link_info); + memset(&link_info, 0, link_len); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); + if (err) { + pr_warn("can't get link info: %s\n", strerror(errno)); + close(fd); + break; + } + if (link_info.type == BPF_LINK_TYPE_XDP) { + if (link_info.xdp.ifindex == ifindex) { + *link_fd = fd; + if (prog_id) + *prog_id = link_info.prog_id; + break; + } + } + close(fd); + } + + return err; +} + +static bool xsk_probe_bpf_link(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), + BPF_EXIT_INSN() + }; + int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); + int ifindex_lo = 1; + bool ret = false; + int err; + + err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); + if (err) + return ret; + + if (link_fd >= 0) + return true; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) + return ret; + + link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); + close(prog_fd); + + if (link_fd >= 0) { + ret = true; + close(link_fd); + } + + return ret; +} + +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) +{ + char ifname[IFNAMSIZ]; + struct xsk_ctx *ctx; + char *interface; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return -ENOMEM; + + interface = if_indextoname(ifindex, &ifname[0]); + if (!interface) { + free(ctx); + return -errno; + } + + ctx->ifindex = ifindex; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); + + return 0; +} + +static int xsk_init_xdp_res(struct xsk_socket *xsk, + int *xsks_map_fd) +{ + struct xsk_ctx *ctx = xsk->ctx; + int err; + + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto err_load_xdp_prog; + + if (ctx->has_bpf_link) + err = xsk_create_bpf_link(xsk); + else + err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd, + xsk->config.xdp_flags, NULL); + + if (err) + goto err_attach_xdp_prog; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_bpf_maps; + + return err; + +err_set_bpf_maps: + if (ctx->has_bpf_link) + close(ctx->link_fd); + else + bpf_xdp_detach(ctx->ifindex, 0, NULL); +err_attach_xdp_prog: + close(ctx->prog_fd); +err_load_xdp_prog: + xsk_delete_bpf_maps(xsk); + return err; +} + +static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) +{ + struct xsk_ctx *ctx = xsk->ctx; + int err; + + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (ctx->prog_fd < 0) { + err = -errno; + goto err_prog_fd; + } + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto err_lookup_maps; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_maps; + + return err; + +err_set_maps: + close(ctx->xsks_map_fd); +err_lookup_maps: + close(ctx->prog_fd); +err_prog_fd: + if (ctx->has_bpf_link) + close(ctx->link_fd); + return err; +} + +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) +{ + struct xsk_socket *xsk = _xdp; + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int err; + + if (ctx->has_bpf_link) + err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); + else + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); + + if (err) + return err; + + err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : + xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); + + if (!err && xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd; + + return err; +} + +static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, + __u32 queue_id) +{ + struct xsk_ctx *ctx; + + if (list_empty(&umem->ctx_list)) + return NULL; + + list_for_each_entry(ctx, &umem->ctx_list, list) { + if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { + ctx->refcount++; + return ctx; + } + } + + return NULL; +} + +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) +{ + struct xsk_umem *umem = ctx->umem; + struct xdp_mmap_offsets off; + int err; + + if (--ctx->refcount) + return; + + if (!unmap) + goto out_free; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (err) + goto out_free; + + munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * + sizeof(__u64)); + munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * + sizeof(__u64)); + +out_free: + list_del(&ctx->list); + free(ctx); +} + +static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, + struct xsk_umem *umem, int ifindex, + const char *ifname, __u32 queue_id, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xsk_ctx *ctx; + int err; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + if (!umem->fill_save) { + err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); + if (err) { + free(ctx); + return NULL; + } + } else if (umem->fill_save != fill || umem->comp_save != comp) { + /* Copy over rings to new structs. */ + memcpy(fill, umem->fill_save, sizeof(*fill)); + memcpy(comp, umem->comp_save, sizeof(*comp)); + } + + ctx->ifindex = ifindex; + ctx->refcount = 1; + ctx->umem = umem; + ctx->queue_id = queue_id; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + ctx->fill = fill; + ctx->comp = comp; + list_add(&ctx->list, &umem->ctx_list); + return ctx; +} + +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) +{ + free(xsk->ctx); + free(xsk); +} + +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) +{ + xsk->ctx->xsks_map_fd = fd; + return xsk_set_bpf_maps(xsk); +} + +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) +{ + struct xsk_socket *xsk; + int res; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + res = xsk_create_xsk_struct(ifindex, xsk); + if (res) { + free(xsk); + return -EINVAL; + } + + res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); + + xsk_destroy_xsk_struct(xsk); + + return res; +} + +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *usr_config) +{ + bool unmap, rx_setup_done = false, tx_setup_done = false; + void *rx_map = NULL, *tx_map = NULL; + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + struct xsk_ctx *ctx; + int err, ifindex; + + if (!umem || !xsk_ptr || !(rx || tx)) + return -EFAULT; + + unmap = umem->fill_save != fill; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + xsk->outstanding_tx = 0; + ifindex = if_nametoindex(ifname); + if (!ifindex) { + err = -errno; + goto out_xsk_alloc; + } + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + rx_setup_done = umem->rx_ring_setup_done; + tx_setup_done = umem->tx_ring_setup_done; + } + + ctx = xsk_get_ctx(umem, ifindex, queue_id); + if (!ctx) { + if (!fill || !comp) { + err = -EFAULT; + goto out_socket; + } + + ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, + fill, comp); + if (!ctx) { + err = -ENOMEM; + goto out_socket; + } + } + xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); + + if (rx && !rx_setup_done) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_put_ctx; + } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; + } + if (tx && !tx_setup_done) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_put_ctx; + } + if (xsk->fd == umem->fd) + umem->tx_ring_setup_done = true; + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (err) { + err = -errno; + goto out_put_ctx; + } + + if (rx) { + rx_map = mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (rx_map == MAP_FAILED) { + err = -errno; + goto out_put_ctx; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = rx_map + off.rx.producer; + rx->consumer = rx_map + off.rx.consumer; + rx->flags = rx_map + off.rx.flags; + rx->ring = rx_map + off.rx.desc; + rx->cached_prod = *rx->producer; + rx->cached_cons = *rx->consumer; + } + xsk->rx = rx; + + if (tx) { + tx_map = mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (tx_map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = tx_map + off.tx.producer; + tx->consumer = tx_map + off.tx.consumer; + tx->flags = tx_map + off.tx.flags; + tx->ring = tx_map + off.tx.desc; + tx->cached_prod = *tx->producer; + /* cached_cons is r->size bigger than the real consumer pointer + * See xsk_prod_nb_free + */ + tx->cached_cons = *tx->consumer + xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = ctx->ifindex; + sxdp.sxdp_queue_id = ctx->queue_id; + if (umem->refcount > 1) { + sxdp.sxdp_flags |= XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + ctx->prog_fd = -1; + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = __xsk_setup_xdp_prog(xsk, NULL); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + umem->fill_save = NULL; + umem->comp_save = NULL; + return 0; + +out_mmap_tx: + if (tx) + munmap(tx_map, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(rx_map, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_put_ctx: + xsk_put_ctx(ctx, unmap); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + if (!umem) + return -EFAULT; + + return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, + rx, tx, umem->fill_save, + umem->comp_save, usr_config); +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err && umem->fill_save && umem->comp_save) { + munmap(umem->fill_save->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp_save->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + size_t desc_sz = sizeof(struct xdp_desc); + struct xdp_mmap_offsets off; + struct xsk_umem *umem; + struct xsk_ctx *ctx; + int err; + + if (!xsk) + return; + + ctx = xsk->ctx; + umem = ctx->umem; + if (ctx->prog_fd != -1) { + xsk_delete_bpf_maps(xsk); + close(ctx->prog_fd); + if (ctx->has_bpf_link) + close(ctx->link_fd); + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (!err) { + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); + } + } + + xsk_put_ctx(ctx, true); + + umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != umem->fd) + close(xsk->fd); + free(xsk); +} diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h new file mode 100644 index 000000000000..915e7135337c --- /dev/null +++ b/tools/testing/selftests/bpf/xsk.h @@ -0,0 +1,315 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright (c) 2018 - 2019 Intel Corporation. + * Copyright (c) 2019 Facebook + * + * Author(s): Magnus Karlsson + */ + +#ifndef __XSK_H +#define __XSK_H + +#include +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* This whole API has been deprecated and moved to libxdp that can be found at + * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so + * it should just be linking with libxdp instead of libbpf for this set of + * functionality. If not, please submit a bug report on the aforementioned page. + */ + +/* Load-Acquire Store-Release barriers used by the XDP socket + * library. The following macros should *NOT* be considered part of + * the xsk.h API, and is subject to change anytime. + * + * LIBRARY INTERNAL + */ + +#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) +#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) + +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_store_release(p, v) \ + do { \ + asm volatile("" : : : "memory"); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + asm volatile("" : : : "memory"); \ + ___p1; \ + }) +#elif defined(__aarch64__) +# define libbpf_smp_store_release(p, v) \ + asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1; \ + asm volatile ("ldar %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + ___p1; \ + }) +#elif defined(__riscv) +# define libbpf_smp_store_release(p, v) \ + do { \ + asm volatile ("fence rw,w" : : : "memory"); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + asm volatile ("fence r,rw" : : : "memory"); \ + ___p1; \ + }) +#endif + +#ifndef libbpf_smp_store_release +#define libbpf_smp_store_release(p, v) \ + do { \ + __sync_synchronize(); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +#endif + +#ifndef libbpf_smp_load_acquire +#define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + __sync_synchronize(); \ + ___p1; \ + }) +#endif + +/* LIBRARY INTERNAL -- END */ + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ + __u32 *flags; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) +{ + return *r->flags & XDP_RING_NEED_WAKEUP; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = libbpf_smp_load_acquire(r->consumer); + r->cached_cons += r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = libbpf_smp_load_acquire(r->producer); + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) +{ + if (xsk_prod_nb_free(prod, nb) < nb) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) +{ + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. + */ + libbpf_smp_store_release(prod->producer, *prod->producer + nb); +} + +static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) +{ + __u32 entries = xsk_cons_nb_avail(cons, nb); + + if (entries > 0) { + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) +{ + cons->cached_cons -= nb; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) +{ + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); + +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +static inline __u64 xsk_umem__extract_addr(__u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline __u64 xsk_umem__extract_offset(__u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) +{ + return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); +} + +int xsk_umem__fd(const struct xsk_umem *umem); +int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 +#define XSK_UMEM__DEFAULT_FLAGS 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; + __u32 flags; +}; + +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +int xsk_umem__delete(struct xsk_umem *umem); +void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __XSK_H */ -- cgit v1.2.3-59-g8ed1b From 31e42721976b9c445477038f8a4006150cd27a60 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 27 Jun 2022 14:15:25 -0700 Subject: selftests/bpf: remove last tests with legacy BPF map definitions Libbpf 1.0 stops support legacy-style BPF map definitions. Selftests has been migrated away from using legacy BPF map definitions except for two selftests, to make sure that legacy functionality still worked in pre-1.0 libbpf. Now it's time to let those tests go as libbpf 1.0 is imminent. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220627211527.2245459-14-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_legacy.h | 9 ---- tools/testing/selftests/bpf/prog_tests/btf.c | 1 - tools/testing/selftests/bpf/progs/test_btf_haskv.c | 51 ---------------------- tools/testing/selftests/bpf/progs/test_btf_newkv.c | 18 -------- 4 files changed, 79 deletions(-) delete mode 100644 tools/testing/selftests/bpf/progs/test_btf_haskv.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 719ab56cdb5d..845209581440 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,15 +2,6 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ -#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ - struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ - }; \ - struct ____btf_map_##name \ - __attribute__ ((section(".maps." #name), used)) \ - ____btf_map_##name = { } - /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 1fd792a92a1c..941b0100bafa 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4651,7 +4651,6 @@ struct btf_file_test { }; static struct btf_file_test file_tests[] = { - { .file = "test_btf_haskv.o", }, { .file = "test_btf_newkv.o", }, { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, }; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c deleted file mode 100644 index 07c94df13660..000000000000 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018 Facebook */ -#include -#include -#include "bpf_legacy.h" - -struct ipv_counts { - unsigned int v4; - unsigned int v6; -}; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); - -__attribute__((noinline)) -int test_long_fname_2(void) -{ - struct ipv_counts *counts; - int key = 0; - - counts = bpf_map_lookup_elem(&btf_map, &key); - if (!counts) - return 0; - - counts->v6++; - - return 0; -} - -__attribute__((noinline)) -int test_long_fname_1(void) -{ - return test_long_fname_2(); -} - -SEC("dummy_tracepoint") -int _dummy_tracepoint(void *arg) -{ - return test_long_fname_1(); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 762671a2e90c..251854a041b5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -9,19 +9,6 @@ struct ipv_counts { unsigned int v6; }; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -/* just to validate we can handle maps in multiple sections */ -struct bpf_map_def SEC("maps") btf_map_legacy = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(long long), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); @@ -41,11 +28,6 @@ int test_long_fname_2(void) counts->v6++; - /* just verify we can reference both maps */ - counts = bpf_map_lookup_elem(&btf_map_legacy, &key); - if (!counts) - return 0; - return 0; } -- cgit v1.2.3-59-g8ed1b From 04cfbc1d89d4cc73b5b328e3bacf24d43e9aa4b7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Tue, 28 Jun 2022 12:37:44 +0200 Subject: selftests: forwarding: ethtool_extended_state: Convert to busywait Currently, this script sets up the test scenario, which is supposed to end in an inability of the system to negotiate a link. It then waits for a bit, and verifies that the system can diagnose why the link was not established. The wait time for the scenario where different link speeds are forced on the two ends of a loopback cable, was set to 4 seconds, which exactly covered it. As of a recent mlxsw firmware update, this time gets longer, and this test starts failing. The time that selftests currently wait for links to be established is currently $WAIT_TIMEOUT, or 20 seconds. It seems reasonable that if this is the time necessary to establish and bring up a link, it should also be enough to determine that a link cannot be established and why. Therefore in this patch, convert the sleeps to busywaits, so that if a failure is established sooner (as is expected), the test runs quicker. And use $WAIT_TIMEOUT as the time to wait. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- .../net/forwarding/ethtool_extended_state.sh | 43 ++++++++++++++-------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh index 4b42dfd4efd1..072faa77f53b 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -11,6 +11,8 @@ NUM_NETIFS=2 source lib.sh source ethtool_lib.sh +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + setup_prepare() { swp1=${NETIFS[p1]} @@ -18,7 +20,7 @@ setup_prepare() swp3=$NETIF_NO_CABLE } -ethtool_extended_state_check() +ethtool_ext_state() { local dev=$1; shift local expected_ext_state=$1; shift @@ -30,21 +32,27 @@ ethtool_extended_state_check() | sed -e 's/^[[:space:]]*//') ext_state=$(echo $ext_state | cut -d "," -f1) - [[ $ext_state == $expected_ext_state ]] - check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" - - [[ $ext_substate == $expected_ext_substate ]] - check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi } autoneg() { + local msg + RET=0 ip link set dev $swp1 up - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" log_test "Autoneg, No partner detected" @@ -53,6 +61,8 @@ autoneg() autoneg_force_mode() { + local msg + RET=0 ip link set dev $swp1 up @@ -65,12 +75,13 @@ autoneg_force_mode() ethtool_set $swp1 speed $speed1 autoneg off ethtool_set $swp2 speed $speed2 autoneg off - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" - ethtool_extended_state_check $swp2 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" log_test "Autoneg, No partner detected during force mode" @@ -83,12 +94,14 @@ autoneg_force_mode() no_cable() { + local msg + RET=0 ip link set dev $swp3 up - sleep 1 - ethtool_extended_state_check $swp3 "No cable" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" log_test "No cable" -- cgit v1.2.3-59-g8ed1b From 3b34bcb946c2a8240ef6761be2ee404ceb7e1079 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:10 -0700 Subject: tools/bpf: Sync btf_ids.h to tools Has been slowly getting out of sync, let's update it. resolve_btfids usage has been updated to match the header changes. Also bring new parts of tools/include/uapi/linux/bpf.h. Acked-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20220628174314.1216643-8-sdf@google.com Signed-off-by: Alexei Starovoitov --- tools/include/linux/btf_ids.h | 35 +++++++++++++++++----- tools/include/uapi/linux/bpf.h | 3 ++ .../selftests/bpf/prog_tests/resolve_btfids.c | 2 +- 3 files changed, 32 insertions(+), 8 deletions(-) (limited to 'tools/testing') diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h index 57890b357f85..71e54b1e3796 100644 --- a/tools/include/linux/btf_ids.h +++ b/tools/include/linux/btf_ids.h @@ -73,7 +73,7 @@ asm( \ __BTF_ID_LIST(name, local) \ extern u32 name[]; -#define BTF_ID_LIST_GLOBAL(name) \ +#define BTF_ID_LIST_GLOBAL(name, n) \ __BTF_ID_LIST(name, globl) /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with @@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl) #define BTF_ID_LIST_SINGLE(name, prefix, typename) \ BTF_ID_LIST(name) \ BTF_ID(prefix, typename) +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \ + BTF_ID_LIST_GLOBAL(name, 1) \ + BTF_ID(prefix, typename) /* * The BTF_ID_UNUSED macro defines 4 zero bytes. @@ -143,13 +146,14 @@ extern struct btf_id_set name; #else -#define BTF_ID_LIST(name) static u32 name[5]; +#define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; #define BTF_ID(prefix, name) #define BTF_ID_UNUSED -#define BTF_ID_LIST_GLOBAL(name) u32 name[1]; -#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; -#define BTF_SET_START(name) static struct btf_id_set name = { 0 }; -#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; +#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; +#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; +#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; +#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; +#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; #define BTF_SET_END(name) #endif /* CONFIG_DEBUG_INFO_BTF */ @@ -172,7 +176,10 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket) enum { #define BTF_SOCK_TYPE(name, str) name, @@ -184,4 +191,18 @@ MAX_BTF_SOCK_TYPE, extern u32 btf_sock_ids[]; #endif +#define BTF_TRACING_TYPE_xxx \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file) \ + BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct) + +enum { +#define BTF_TRACING_TYPE(name, type) name, +BTF_TRACING_TYPE_xxx +#undef BTF_TRACING_TYPE +MAX_BTF_TRACING_TYPE, +}; + +extern u32 btf_tracing_ids[]; + #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b7479898c879..ad9e7311c4cf 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1432,6 +1432,7 @@ union bpf_attr { __u32 attach_flags; __aligned_u64 prog_ids; __u32 prog_cnt; + __aligned_u64 prog_attach_flags; /* output: per-program attach_flags */ } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ @@ -6076,6 +6077,8 @@ struct bpf_prog_info { __u64 run_cnt; __u64 recursion_misses; __u32 verified_insns; + __u32 attach_btf_obj_id; + __u32 attach_btf_id; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f4a13d9dd5c8..c197261d02e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -44,7 +44,7 @@ BTF_ID(union, U) BTF_ID(func, func) extern __u32 test_list_global[]; -BTF_ID_LIST_GLOBAL(test_list_global) +BTF_ID_LIST_GLOBAL(test_list_global, 1) BTF_ID_UNUSED BTF_ID(typedef, S) BTF_ID(typedef, T) -- cgit v1.2.3-59-g8ed1b From dca85aac8895708b74c7f2264e3ab5048c02b8b2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 28 Jun 2022 10:43:14 -0700 Subject: selftests/bpf: lsm_cgroup functional test Functional test that exercises the following: 1. apply default sk_priority policy 2. permit TX-only AF_PACKET socket 3. cgroup attach/detach/replace 4. reusing trampoline shim Signed-off-by: Stanislav Fomichev Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220628174314.1216643-12-sdf@google.com Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/lsm_cgroup.c | 293 +++++++++++++++++++++ .../testing/selftests/bpf/progs/bpf_tracing_net.h | 1 + tools/testing/selftests/bpf/progs/lsm_cgroup.c | 180 +++++++++++++ 3 files changed, 474 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c create mode 100644 tools/testing/selftests/bpf/progs/lsm_cgroup.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c new file mode 100644 index 000000000000..d40810a742fa --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include "lsm_cgroup.skel.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" + +static struct btf *btf; + +static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) +{ + LIBBPF_OPTS(bpf_prog_query_opts, p); + int cnt = 0; + int i; + + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + if (!attach_func) + return p.prog_cnt; + + /* When attach_func is provided, count the number of progs that + * attach to the given symbol. + */ + + if (!btf) + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux")) + return -1; + + p.prog_ids = malloc(sizeof(u32) * p.prog_cnt); + p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt); + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + for (i = 0; i < p.prog_cnt; i++) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int fd; + + fd = bpf_prog_get_fd_by_id(p.prog_ids[i]); + ASSERT_GE(fd, 0, "prog_get_fd_by_id"); + ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd"); + close(fd); + + if (info.attach_btf_id == + btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC)) + cnt++; + } + + free(p.prog_ids); + free(p.prog_attach_flags); + + return cnt; +} + +static void test_lsm_cgroup_functional(void) +{ + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1; + int listen_fd, client_fd, accepted_fd; + struct lsm_cgroup *skel = NULL; + int post_create_prog_fd2 = -1; + int post_create_prog_fd = -1; + int bind_link_fd2 = -1; + int bind_prog_fd2 = -1; + int alloc_prog_fd = -1; + int bind_prog_fd = -1; + int bind_link_fd = -1; + int clone_prog_fd = -1; + int err, fd, prio; + socklen_t socklen; + + cgroup_fd3 = test__join_cgroup("/sock_policy_empty"); + if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup")) + goto close_cgroup; + + cgroup_fd2 = test__join_cgroup("/sock_policy_reuse"); + if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse")) + goto close_cgroup; + + cgroup_fd = test__join_cgroup("/sock_policy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto close_cgroup; + + skel = lsm_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto close_cgroup; + + post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create); + post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2); + bind_prog_fd = bpf_program__fd(skel->progs.socket_bind); + bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2); + alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc); + clone_prog_fd = bpf_program__fd(skel->progs.socket_clone); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count"); + err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach alloc_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count"); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count"); + err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach clone_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count"); + + /* Make sure replacing works. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count"); + err = bpf_prog_attach(post_create_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + attach_opts.replace_prog_fd = post_create_prog_fd; + err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP, &attach_opts); + if (!ASSERT_OK(err, "prog replace post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + /* Try the same attach/replace via link API. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + update_opts.old_prog_fd = bind_prog_fd; + update_opts.flags = BPF_F_REPLACE; + + err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts); + if (!ASSERT_OK(err, "link update bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + /* Attach another instance of bind program to another cgroup. + * This should trigger the reuse of the trampoline shim (two + * programs attaching to the same btf_id). + */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); + + /* AF_UNIX is prohibited. */ + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + close(fd); + + /* AF_INET6 gets default policy (sk_priority). */ + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 123, "sk_priority"); + + close(fd); + + /* TX-only AF_PACKET is allowed. */ + + ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0, + "socket(AF_PACKET, ..., ETH_P_ALL)"); + + fd = socket(AF_PACKET, SOCK_RAW, 0); + ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)"); + + /* TX-only AF_PACKET can not be rebound. */ + + struct sockaddr_ll sa = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_ALL), + }; + ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0, + "bind(ETH_P_ALL)"); + + close(fd); + + /* Trigger passive open. */ + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + ASSERT_GE(listen_fd, 0, "start_server"); + client_fd = connect_to_fd(listen_fd, 0); + ASSERT_GE(client_fd, 0, "connect_to_fd"); + accepted_fd = accept(listen_fd, NULL, NULL); + ASSERT_GE(accepted_fd, 0, "accept"); + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 234, "sk_priority"); + + /* These are replaced and never called. */ + ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create"); + ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind"); + + /* AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + + /* start_server + * bind(ETH_P_ALL) + */ + ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2"); + /* Single accept(). */ + ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone"); + + /* AF_UNIX+SOCK_STREAM (failed) + * AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW (failed) + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc"); + + close(listen_fd); + close(client_fd); + close(accepted_fd); + + /* Make sure other cgroup doesn't trigger the programs. */ + + if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup")) + goto detach_cgroup; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 0, "sk_priority"); + + close(fd); + +detach_cgroup: + ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_create"); + close(bind_link_fd); + /* Don't close bind_link_fd2, exercise cgroup release cleanup. */ + ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_alloc"); + ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_clone"); + +close_cgroup: + close(cgroup_fd); + close(cgroup_fd2); + close(cgroup_fd3); + lsm_cgroup__destroy(skel); +} + +void test_lsm_cgroup(void) +{ + if (test__start_subtest("functional")) + test_lsm_cgroup_functional(); + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1c1289ba5fc5..98dd2c4815f0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -8,6 +8,7 @@ #define SOL_SOCKET 1 #define SO_SNDBUF 7 #define __SO_ACCEPTCON (1 << 16) +#define SO_PRIORITY 12 #define SOL_TCP 6 #define TCP_CONGESTION 13 diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c new file mode 100644 index 000000000000..89f3b1e961a8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +#ifndef AF_PACKET +#define AF_PACKET 17 +#endif + +#ifndef AF_UNIX +#define AF_UNIX 1 +#endif + +#ifndef EPERM +#define EPERM 1 +#endif + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, __u64); +} cgroup_storage SEC(".maps"); + +int called_socket_post_create; +int called_socket_post_create2; +int called_socket_bind; +int called_socket_bind2; +int called_socket_alloc; +int called_socket_clone; + +static __always_inline int test_local_storage(void) +{ + __u64 *val; + + val = bpf_get_local_storage(&cgroup_storage, 0); + if (!val) + return 0; + *val += 1; + + return 1; +} + +static __always_inline int real_create(struct socket *sock, int family, + int protocol) +{ + struct sock *sk; + int prio = 123; + + /* Reject non-tx-only AF_PACKET. */ + if (family == AF_PACKET && protocol != 0) + return 0; /* EPERM */ + + sk = sock->sk; + if (!sk) + return 1; + + /* The rest of the sockets get default policy. */ + if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + if (prio != 123) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create++; + return real_create(sock, family, protocol); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create2, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create2++; + return real_create(sock, family, protocol); +} + +static __always_inline int real_bind(struct socket *sock, + struct sockaddr *address, + int addrlen) +{ + struct sockaddr_ll sa = {}; + + if (sock->sk->__sk_common.skc_family != AF_PACKET) + return 1; + + if (sock->sk->sk_kern_sock) + return 1; + + bpf_probe_read_kernel(&sa, sizeof(sa), address); + if (sa.sll_protocol) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind2++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */ +SEC("lsm_cgroup/sk_alloc_security") +int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) +{ + called_socket_alloc++; + if (family == AF_UNIX) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_sock */ +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req) +{ + int prio = 234; + + called_socket_clone++; + + if (!newsk) + return 1; + + /* Accepted request sockets get a different priority. */ + if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + if (prio != 234) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} -- cgit v1.2.3-59-g8ed1b From 6d304871e3ef4c339c06aa9b4ab55b6c77642884 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 29 Jun 2022 21:36:37 +0100 Subject: bpftool: Use feature list in bash completion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that bpftool is able to produce a list of known program, map, attach types, let's use as much of this as we can in the bash completion file, so that we don't have to expand the list each time a new type is added to the kernel. Also update the relevant test script to remove some checks that are no longer needed. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Daniel Müller Link: https://lore.kernel.org/bpf/20220629203637.138944-3-quentin@isovalent.com --- tools/bpf/bpftool/bash-completion/bpftool | 21 ++++----------------- .../testing/selftests/bpf/test_bpftool_synctypes.py | 20 +++----------------- 2 files changed, 7 insertions(+), 34 deletions(-) (limited to 'tools/testing') diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 9cef6516320b..ee177f83b179 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -703,15 +703,8 @@ _bpftool() return 0 ;; type) - local BPFTOOL_MAP_CREATE_TYPES='hash array \ - prog_array perf_event_array percpu_hash \ - percpu_array stack_trace cgroup_array lru_hash \ - lru_percpu_hash lpm_trie array_of_maps \ - hash_of_maps devmap devmap_hash sockmap cpumap \ - xskmap sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage queue stack sk_storage \ - struct_ops ringbuf inode_storage task_storage \ - bloom_filter' + local BPFTOOL_MAP_CREATE_TYPES="$(bpftool feature list map_types | \ + grep -v '^unspec$')" COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; @@ -1039,14 +1032,8 @@ _bpftool() return 0 ;; attach|detach) - local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \ - cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \ - cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \ - cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \ - cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \ - cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \ - cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \ - cgroup_inet_sock_release' + local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list attach_types | \ + grep '^cgroup_')" local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' # Check for $prev = $command first diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index e443e6542cb9..a6410bebe603 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -471,12 +471,6 @@ class BashcompExtractor(FileExtractor): def get_prog_attach_types(self): return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') - def get_map_types(self): - return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') - - def get_cgroup_attach_types(self): - return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') - def verify(first_set, second_set, message): """ Print all values that differ between two sets. @@ -516,17 +510,12 @@ def main(): man_map_types = man_map_info.get_map_types() man_map_info.close() - bashcomp_info = BashcompExtractor() - bashcomp_map_types = bashcomp_info.get_map_types() - verify(source_map_types, help_map_types, f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):') verify(source_map_types, man_map_types, f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):') verify(help_map_options, man_map_options, f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') - verify(source_map_types, bashcomp_map_types, - f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') # Attach types (names) @@ -542,8 +531,10 @@ def main(): man_prog_attach_types = man_prog_info.get_attach_types() man_prog_info.close() - bashcomp_info.reset_read() # We stopped at map types, rewind + + bashcomp_info = BashcompExtractor() bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() + bashcomp_info.close() verify(source_prog_attach_types, help_prog_attach_types, f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') @@ -568,17 +559,12 @@ def main(): man_cgroup_attach_types = man_cgroup_info.get_attach_types() man_cgroup_info.close() - bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() - bashcomp_info.close() - verify(source_cgroup_attach_types, help_cgroup_attach_types, f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_cgroup_attach_types, man_cgroup_attach_types, f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') verify(help_cgroup_options, man_cgroup_options, f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') - verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, - f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') # Options for remaining commands -- cgit v1.2.3-59-g8ed1b From 24d2e5d9da608445e2e5c2c9ab5cf418f0fc4612 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 29 Jun 2022 16:34:55 +0200 Subject: selftests/xsk: Avoid bpf_link probe for existing xsk Currently bpf_link probe is done for each call of xsk_socket__create(). For cases where xsk context was previously created and current socket creation uses it, has_bpf_link will be overwritten, where it has already been initialized. Optimize this by moving the query to the xsk_create_ctx() so that when xsk_get_ctx() finds a ctx then no further bpf_link probes are needed. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220629143458.934337-2-maciej.fijalkowski@intel.com --- tools/testing/selftests/bpf/xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index eb50c3f336f8..fa13d2c44517 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -958,6 +958,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->fill = fill; ctx->comp = comp; list_add(&ctx->list, &umem->ctx_list); + ctx->has_bpf_link = xsk_probe_bpf_link(); return ctx; } @@ -1059,7 +1060,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, } } xsk->ctx = ctx; - xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); if (rx && !rx_setup_done) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, -- cgit v1.2.3-59-g8ed1b From 61333008d01e18716a7050fdc9479cc8d6e63883 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 29 Jun 2022 16:34:56 +0200 Subject: selftests/xsk: Introduce XDP prog load based on existing AF_XDP socket Currently, xsk_setup_xdp_prog() uses anonymous xsk_socket struct which means that during xsk_create_bpf_link() call, xsk->config.xdp_flags is always 0. This in turn means that from xdpxceiver it is impossible to use xdpgeneric attachment, so since commit 3b22523bca02 ("selftests, xsk: Fix bpf_res cleanup test") we were not testing SKB mode at all. To fix this, introduce a function, called xsk_setup_xdp_prog_xsk(), that will load XDP prog based on the existing xsk_socket, so that xsk context's refcount is correctly bumped and flags from application side are respected. Use this from xdpxceiver side so we get coverage of generic and native XDP program attach points. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220629143458.934337-3-maciej.fijalkowski@intel.com --- tools/testing/selftests/bpf/xdpxceiver.c | 2 +- tools/testing/selftests/bpf/xsk.c | 5 +++++ tools/testing/selftests/bpf/xsk.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 019c567b6b4e..c024aa91ea02 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -1130,7 +1130,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifindex) exit_with_error(errno); - ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd); + ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index fa13d2c44517..db911127720e 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -880,6 +880,11 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) return err; } +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd) +{ + return __xsk_setup_xdp_prog(xsk, xsks_map_fd); +} + static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, __u32 queue_id) { diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 915e7135337c..997723b0bfb2 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -269,6 +269,7 @@ struct xsk_umem_config { __u32 flags; }; +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd); int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); -- cgit v1.2.3-59-g8ed1b From 6d4c767c032b165cc290c51f4e82bc54b14b1cf1 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 29 Jun 2022 16:34:57 +0200 Subject: selftests/xsk: Verify correctness of XDP prog attach point To prevent the case we had previously where for TEST_MODE_SKB, XDP prog was attached in native mode, call bpf_xdp_query() after loading prog and make sure that attach_mode is as expected. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220629143458.934337-4-maciej.fijalkowski@intel.com --- tools/testing/selftests/bpf/xdpxceiver.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index c024aa91ea02..4c425a43e5b0 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -1085,6 +1085,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret, ifindex; void *bufs; u32 i; @@ -1134,6 +1135,22 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (ret) exit_with_error(-ret); + ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); + if (ret) + exit_with_error(-ret); + + if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { + if (opts.attach_mode != XDP_ATTACHED_SKB) { + ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); + exit_with_error(-EINVAL); + } + } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { + if (opts.attach_mode != XDP_ATTACHED_DRV) { + ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); + exit_with_error(-EINVAL); + } + } + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); -- cgit v1.2.3-59-g8ed1b From 39e940d4abfabb08b6937a315546b24d10be67e3 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 29 Jun 2022 16:34:58 +0200 Subject: selftests/xsk: Destroy BPF resources only when ctx refcount drops to 0 Currently, xsk_socket__delete frees BPF resources regardless of ctx refcount. Xdpxceiver has a test to verify whether underlying BPF resources would not be wiped out after closing XSK socket that was bound to interface with other active sockets. From library's xsk part perspective it also means that the internal xsk context is shared and its refcount is bumped accordingly. After a switch to loading XDP prog based on previously opened XSK socket, mentioned xdpxceiver test fails with: not ok 16 [xdpxceiver.c:swap_xsk_resources:1334]: ERROR: 9/"Bad file descriptor which means that in swap_xsk_resources(), xsk_socket__delete() released xskmap which in turn caused a failure of xsk_socket__update_xskmap(). To fix this, when deleting socket, decrement ctx refcount before releasing BPF resources and do so only when refcount dropped to 0 which means there are no more active sockets for this ctx so BPF resources can be freed safely. Fixes: 2f6324a3937f ("libbpf: Support shared umems between queues and devices") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20220629143458.934337-5-maciej.fijalkowski@intel.com --- tools/testing/selftests/bpf/xsk.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index db911127720e..f2721a4ae7c5 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -1156,8 +1156,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_mmap_tx; } - ctx->prog_fd = -1; - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = __xsk_setup_xdp_prog(xsk, NULL); if (err) @@ -1238,7 +1236,10 @@ void xsk_socket__delete(struct xsk_socket *xsk) ctx = xsk->ctx; umem = ctx->umem; - if (ctx->prog_fd != -1) { + + xsk_put_ctx(ctx, true); + + if (!ctx->refcount) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); if (ctx->has_bpf_link) @@ -1257,8 +1258,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) } } - xsk_put_ctx(ctx, true); - umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. -- cgit v1.2.3-59-g8ed1b From 9c154ab47f5e5ff632d2b7af6342c027d7e04b92 Mon Sep 17 00:00:00 2001 From: Alaa Mohamed Date: Thu, 30 Jun 2022 12:24:49 +0200 Subject: selftests: net: fib_rule_tests: fix support for running individual tests parsing and usage of -t got missed in the previous patch. this patch fixes it Fixes: 816cda9ae531 ("selftests: net: fib_rule_tests: add support to select a test to run") Signed-off-by: Alaa Mohamed Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_rule_tests.sh | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index bbe3b379927a..c245476fa29d 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -303,6 +303,29 @@ run_fibrule_tests() log_section "IPv6 fib rule" fib_rule6_test } +################################################################################ +# usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" -- cgit v1.2.3-59-g8ed1b From dbdd9a28e1406ab8218a69e60f10a168b968c81d Mon Sep 17 00:00:00 2001 From: Li kunyu Date: Fri, 1 Jul 2022 17:13:45 +0800 Subject: net/cmsg_sender: Remove a semicolon Remove the repeated ';' from code. Signed-off-by: Li kunyu Signed-off-by: David S. Miller --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index bc2162909a1a..75dd83e39207 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -456,7 +456,7 @@ int main(int argc, char *argv[]) buf[1] = 0; } else if (opt.sock.type == SOCK_RAW) { struct udphdr hdr = { 1, 2, htons(opt.size), 0 }; - struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;; + struct sockaddr_in6 *sin6 = (void *)ai->ai_addr; memcpy(buf, &hdr, sizeof(hdr)); sin6->sin6_port = htons(opt.sock.proto); -- cgit v1.2.3-59-g8ed1b From b0d93b44641a83c28014ca38001e85bf6dc8501e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 30 Jun 2022 15:42:03 -0700 Subject: selftests/bpf: Skip lsm_cgroup when we don't have trampolines With arch_prepare_bpf_trampoline removed on x86: [...] #98/1 lsm_cgroup/functional:SKIP #98 lsm_cgroup:SKIP Summary: 1/0 PASSED, 1 SKIPPED, 0 FAILED Fixes: dca85aac8895 ("selftests/bpf: lsm_cgroup functional test") Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20220630224203.512815-1-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index d40810a742fa..c542d7e80a5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -9,6 +9,10 @@ #include "cgroup_helpers.h" #include "network_helpers.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + static struct btf *btf; static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) @@ -100,6 +104,10 @@ static void test_lsm_cgroup_functional(void) ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count"); ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count"); err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (err == -ENOTSUPP) { + test__skip(); + goto close_cgroup; + } if (!ASSERT_OK(err, "attach alloc_prog_fd")) goto detach_cgroup; ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count"); -- cgit v1.2.3-59-g8ed1b From 0d153dd208d46c2096151ac1c484e776754dfe51 Mon Sep 17 00:00:00 2001 From: Casper Andersson Date: Fri, 1 Jul 2022 16:43:50 +0200 Subject: selftest: net: bridge mdb add/del entry to port that is down Tests that permanent mdb entries can be added/deleted on ports with state down. Signed-off-by: Casper Andersson Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/Makefile | 1 + .../net/forwarding/bridge_mdb_port_down.sh | 118 +++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8f481218a492..669ffd6f2a68 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ + bridge_mdb_port_down.sh \ bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh new file mode 100755 index 000000000000..1a0480e71d83 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that permanent mdb entries can be added to and deleted from bridge +# interfaces that are down, and works correctly when done so. + +ALL_TESTS="add_del_to_port_down" +NUM_NETIFS=4 + +TEST_GROUP="239.10.10.10" +TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +source lib.sh + + +add_del_to_port_down() { + RET=0 + + ip link set dev $swp2 down + bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null + check_err $? "Failed adding mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" + + ip link set dev $swp2 down + bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null + check_err $? "MDB entry did not persist after link up/down" + + bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null + check_err $? "Failed deleting mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed" + + log_test "MDB add/del entry to port with state down " +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + + bridge link set dev $swp2 mcast_flood off + # Bridge currently has a "grace time" at creation time before it + # forwards multicast according to the mdb. Since we disable the + # mcast_flood setting per port + sleep 10 +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + h2_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +tests_run +exit $EXIT_STATUS -- cgit v1.2.3-59-g8ed1b From e95ab1d852897a0b697cd0fb609d496ce97fff3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 2 Jul 2022 08:48:18 -0700 Subject: selftests: net: af_unix: Test connect() with different netns. This patch add a test that checks connect()ivity between two sockets: unnamed socket -> bound socket * SOCK_STREAM or SOCK_DGRAM * pathname or abstract * same or different netns Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/af_unix/Makefile | 3 +- tools/testing/selftests/net/af_unix/unix_connect.c | 149 +++++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/af_unix/unix_connect.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index a29f79618934..1257baa79286 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -37,3 +37,4 @@ gro ioam6_parser toeplitz cmsg_sender +unix_connect \ No newline at end of file diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index df341648f818..969620ae9928 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,2 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob +TEST_GEN_PROGS := test_unix_oob unix_connect + include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c new file mode 100644 index 000000000000..157e44ef7f37 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include + +#include +#include + +#include +#include + +#include "../../kselftest_harness.h" + +FIXTURE(unix_connect) +{ + int server, client; + int family; +}; + +FIXTURE_VARIANT(unix_connect) +{ + int type; + char sun_path[8]; + int len; + int flags; + int err; +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_SETUP(unix_connect) +{ + self->family = AF_UNIX; +} + +FIXTURE_TEARDOWN(unix_connect) +{ + close(self->server); + close(self->client); + + if (variant->sun_path[0]) + remove("test"); +} + +#define offsetof(type, member) ((size_t)&((type *)0)->(member)) + +TEST_F(unix_connect, test) +{ + socklen_t addrlen; + struct sockaddr_un addr = { + .sun_family = self->family, + }; + int err; + + self->server = socket(self->family, variant->type, 0); + ASSERT_NE(-1, self->server); + + addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len; + memcpy(&addr.sun_path, variant->sun_path, variant->len); + + err = bind(self->server, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 32); + ASSERT_EQ(0, err); + } + + err = unshare(variant->flags); + ASSERT_EQ(0, err); + + self->client = socket(self->family, variant->type, 0); + ASSERT_LT(0, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(variant->err, err == -1 ? errno : 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3-59-g8ed1b From 67d8ed4295258cb17e2bed7ed5ada92526a643f5 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Tue, 28 Jun 2022 16:01:24 +0000 Subject: selftests/bpf: Add type-match checks to type-based tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have type-match logic in both libbpf and the kernel, this change adjusts the existing BPF self tests to check this functionality. Specifically, we extend the existing type-based tests to check the previously introduced bpf_core_type_matches macro. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220628160127.607834-8-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 31 +++++++++++++++++++-- .../testing/selftests/bpf/progs/core_reloc_types.h | 14 +++++++++- .../bpf/progs/test_core_reloc_type_based.c | 32 +++++++++++++++++++++- 3 files changed, 73 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 2f92feb809be..328dd744e5bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -543,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test) return 0; } - static const struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -752,7 +751,7 @@ static const struct core_reloc_test_case test_cases[] = { SIZE_CASE(size___diff_offs), SIZE_ERR_CASE(size___err_ambiguous), - /* validate type existence and size relocations */ + /* validate type existence, match, and size relocations */ TYPE_BASED_CASE(type_based, { .struct_exists = 1, .union_exists = 1, @@ -765,6 +764,19 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_void_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 1, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 1, + .typedef_arr_matches = 1, + .struct_sz = sizeof(struct a_struct), .union_sz = sizeof(union a_union), .enum_sz = sizeof(enum an_enum), @@ -792,6 +804,19 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_void_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 0, + .union_matches = 0, + .enum_matches = 0, + .typedef_named_struct_matches = 0, + .typedef_anon_struct_matches = 0, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 0, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + .struct_sz = sizeof(struct a_struct___diff_sz), .union_sz = sizeof(union a_union___diff_sz), .enum_sz = sizeof(enum an_enum___diff_sz), @@ -806,10 +831,12 @@ static const struct core_reloc_test_case test_cases[] = { }), TYPE_BASED_CASE(type_based___incompat, { .enum_exists = 1, + .enum_matches = 1, .enum_sz = sizeof(enum an_enum), }), TYPE_BASED_CASE(type_based___fn_wrong_args, { .struct_exists = 1, + .struct_matches = 1, .struct_sz = sizeof(struct a_struct), }), diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 26e103302c05..6a44f3e63ae5 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -860,7 +860,7 @@ struct core_reloc_size___err_ambiguous2 { }; /* - * TYPE EXISTENCE & SIZE + * TYPE EXISTENCE, MATCH & SIZE */ struct core_reloc_type_based_output { bool struct_exists; @@ -875,6 +875,18 @@ struct core_reloc_type_based_output { bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index fb60f8195c53..325ead666130 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -61,6 +61,18 @@ struct core_reloc_type_based_output { bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; @@ -77,7 +89,13 @@ struct core_reloc_type_based_output { SEC("raw_tracepoint/sys_enter") int test_core_type_based(void *ctx) { -#if __has_builtin(__builtin_preserve_type_info) + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. Part of it + * could run with merely __builtin_preserve_type_info (which could be checked + * separately), but we have to find an upper bound. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct core_reloc_type_based_output *out = (void *)&data.out; out->struct_exists = bpf_core_type_exists(struct a_struct); @@ -92,6 +110,18 @@ int test_core_type_based(void *ctx) out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + out->struct_matches = bpf_core_type_matches(struct a_struct); + out->union_matches = bpf_core_type_matches(union a_union); + out->enum_matches = bpf_core_type_matches(enum an_enum); + out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef); + out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef); + out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef); + out->typedef_int_matches = bpf_core_type_matches(int_typedef); + out->typedef_enum_matches = bpf_core_type_matches(enum_typedef); + out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef); + out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef); + out->typedef_arr_matches = bpf_core_type_matches(arr_typedef); + out->struct_sz = bpf_core_type_size(struct a_struct); out->union_sz = bpf_core_type_size(union a_union); out->enum_sz = bpf_core_type_size(enum an_enum); -- cgit v1.2.3-59-g8ed1b From bed56a6dd4cbf76ff757679a9333f9d3a72ed7ec Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Tue, 28 Jun 2022 16:01:25 +0000 Subject: selftests/bpf: Add test checking more characteristics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds another type-based self-test that specifically aims to test some more characteristics of the TYPE_MATCH logic. Specifically, it covers a few more potential differences between types, such as different orders, enum variant values, and integer signedness. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220628160127.607834-9-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 37 ++++++++++++++++ .../bpf/progs/btf__core_reloc_type_based___diff.c | 3 ++ .../testing/selftests/bpf/progs/core_reloc_types.h | 51 ++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 328dd744e5bd..eb47bfde459c 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -792,6 +792,43 @@ static const struct core_reloc_test_case test_cases[] = { TYPE_BASED_CASE(type_based___all_missing, { /* all zeros */ }), + TYPE_BASED_CASE(type_based___diff, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + + .struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + + .struct_sz = sizeof(struct a_struct___diff), + .union_sz = sizeof(union a_union___diff), + .enum_sz = sizeof(enum an_enum___diff), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff), + .typedef_int_sz = sizeof(int_typedef___diff), + .typedef_enum_sz = sizeof(enum_typedef___diff), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff), + .typedef_arr_sz = sizeof(arr_typedef___diff), + }), TYPE_BASED_CASE(type_based___diff_sz, { .struct_exists = 1, .union_exists = 1, diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c new file mode 100644 index 000000000000..57ae2c258928 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 6a44f3e63ae5..e326b6a781e5 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -951,6 +951,57 @@ struct core_reloc_type_based { struct core_reloc_type_based___all_missing { }; +/* different member orders, enum variant values, signedness, etc */ +struct a_struct___diff { + int x; + int a; +}; + +union a_union___diff { + int z; + int y; +}; + +typedef struct a_struct___diff named_struct_typedef___diff; + +typedef struct { int z, x, y; } anon_struct_typedef___diff; + +typedef struct { + int c; + int b; + int a; +} *struct_ptr_typedef___diff; + +enum an_enum___diff { + AN_ENUM_VAL2___diff = 0, + AN_ENUM_VAL1___diff = 42, + AN_ENUM_VAL3___diff = 1, +}; + +typedef unsigned int int_typedef___diff; + +typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff; + +typedef const void *void_ptr_typedef___diff; + +typedef int_typedef___diff (*func_proto_typedef___diff)(long); + +typedef char arr_typedef___diff[3]; + +struct core_reloc_type_based___diff { + struct a_struct___diff f1; + union a_union___diff f2; + enum an_enum___diff f3; + named_struct_typedef___diff f4; + anon_struct_typedef___diff f5; + struct_ptr_typedef___diff f6; + int_typedef___diff f7; + enum_typedef___diff f8; + void_ptr_typedef___diff f9; + func_proto_typedef___diff f10; + arr_typedef___diff f11; +}; + /* different type sizes, extra modifiers, anon vs named enums, etc */ struct a_struct___diff_sz { long x; -- cgit v1.2.3-59-g8ed1b From 537905c4b68fe3a32c4925122fe792eb2f594b73 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Tue, 28 Jun 2022 16:01:26 +0000 Subject: selftests/bpf: Add nested type to type based tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change extends the type based tests with another struct type (in addition to a_struct) to check relocations against: a_complex_struct. This type is nested more deeply to provide additional coverage of certain paths in the type match logic. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220628160127.607834-10-deso@posteo.net --- .../testing/selftests/bpf/prog_tests/core_reloc.c | 4 ++ .../testing/selftests/bpf/progs/core_reloc_types.h | 62 +++++++++++++++------- .../bpf/progs/test_core_reloc_type_based.c | 12 +++++ 3 files changed, 58 insertions(+), 20 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index eb47bfde459c..8882c9c1519b 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -754,6 +754,7 @@ static const struct core_reloc_test_case test_cases[] = { /* validate type existence, match, and size relocations */ TYPE_BASED_CASE(type_based, { .struct_exists = 1, + .complex_struct_exists = 1, .union_exists = 1, .enum_exists = 1, .typedef_named_struct_exists = 1, @@ -766,6 +767,7 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_arr_exists = 1, .struct_matches = 1, + .complex_struct_matches = 1, .union_matches = 1, .enum_matches = 1, .typedef_named_struct_matches = 1, @@ -794,6 +796,7 @@ static const struct core_reloc_test_case test_cases[] = { }), TYPE_BASED_CASE(type_based___diff, { .struct_exists = 1, + .complex_struct_exists = 1, .union_exists = 1, .enum_exists = 1, .typedef_named_struct_exists = 1, @@ -806,6 +809,7 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_arr_exists = 1, .struct_matches = 1, + .complex_struct_matches = 1, .union_matches = 1, .enum_matches = 1, .typedef_named_struct_matches = 1, diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index e326b6a781e5..474411c4b908 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -864,6 +864,7 @@ struct core_reloc_size___err_ambiguous2 { */ struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -876,6 +877,7 @@ struct core_reloc_type_based_output { bool typedef_arr_exists; bool struct_matches; + bool complex_struct_matches; bool union_matches; bool enum_matches; bool typedef_named_struct_matches; @@ -904,6 +906,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct * restrict a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -935,16 +945,17 @@ typedef char arr_typedef[20]; struct core_reloc_type_based { struct a_struct f1; - union a_union f2; - enum an_enum f3; - named_struct_typedef f4; - anon_struct_typedef f5; - struct_ptr_typedef f6; - int_typedef f7; - enum_typedef f8; - void_ptr_typedef f9; - func_proto_typedef f10; - arr_typedef f11; + struct a_complex_struct f2; + union a_union f3; + enum an_enum f4; + named_struct_typedef f5; + anon_struct_typedef f6; + struct_ptr_typedef f7; + int_typedef f8; + enum_typedef f9; + void_ptr_typedef f10; + func_proto_typedef f11; + arr_typedef f12; }; /* no types in target */ @@ -957,6 +968,16 @@ struct a_struct___diff { int a; }; +struct a_struct___forward; + +struct a_complex_struct___diff { + union { + struct a_struct___forward *a; + void *b; + } x; + volatile long y; +}; + union a_union___diff { int z; int y; @@ -990,16 +1011,17 @@ typedef char arr_typedef___diff[3]; struct core_reloc_type_based___diff { struct a_struct___diff f1; - union a_union___diff f2; - enum an_enum___diff f3; - named_struct_typedef___diff f4; - anon_struct_typedef___diff f5; - struct_ptr_typedef___diff f6; - int_typedef___diff f7; - enum_typedef___diff f8; - void_ptr_typedef___diff f9; - func_proto_typedef___diff f10; - arr_typedef___diff f11; + struct a_complex_struct___diff f2; + union a_union___diff f3; + enum an_enum___diff f4; + named_struct_typedef___diff f5; + anon_struct_typedef___diff f6; + struct_ptr_typedef___diff f7; + int_typedef___diff f8; + enum_typedef___diff f9; + void_ptr_typedef___diff f10; + func_proto_typedef___diff f11; + arr_typedef___diff f12; }; /* different type sizes, extra modifiers, anon vs named enums, etc */ diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index 325ead666130..d95bc08b75c1 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -19,6 +19,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct *a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -50,6 +58,7 @@ typedef char arr_typedef[20]; struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -62,6 +71,7 @@ struct core_reloc_type_based_output { bool typedef_arr_exists; bool struct_matches; + bool complex_struct_matches; bool union_matches; bool enum_matches; bool typedef_named_struct_matches; @@ -99,6 +109,7 @@ int test_core_type_based(void *ctx) struct core_reloc_type_based_output *out = (void *)&data.out; out->struct_exists = bpf_core_type_exists(struct a_struct); + out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct); out->union_exists = bpf_core_type_exists(union a_union); out->enum_exists = bpf_core_type_exists(enum an_enum); out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); @@ -111,6 +122,7 @@ int test_core_type_based(void *ctx) out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); out->struct_matches = bpf_core_type_matches(struct a_struct); + out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct); out->union_matches = bpf_core_type_matches(union a_union); out->enum_matches = bpf_core_type_matches(enum an_enum); out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef); -- cgit v1.2.3-59-g8ed1b From 950b347787224e62f59c099e3e3f3f6ecc720d61 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Tue, 28 Jun 2022 16:01:27 +0000 Subject: selftests/bpf: Add type match test against kernel's task_struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change extends the existing core_reloc/kernel test to include a type match check of a local task_struct against the kernel's definition -- which we assume to succeed. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220628160127.607834-11-deso@posteo.net --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 1 + tools/testing/selftests/bpf/progs/core_reloc_types.h | 1 + .../selftests/bpf/progs/test_core_reloc_kernel.c | 19 +++++++++++++++++++ 3 files changed, 21 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 8882c9c1519b..a6f65e2236f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -555,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = { .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, .comm = "test_progs", .comm_len = sizeof("test_progs"), + .local_task_struct_matches = true, }, .output_len = sizeof(struct core_reloc_kernel_output), .raw_tp_name = "sys_enter", diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 474411c4b908..7ef91d19c66e 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -13,6 +13,7 @@ struct core_reloc_kernel_output { int valid[10]; char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; /* diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 145028b52ad8..a17dd83eae67 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -21,6 +21,7 @@ struct core_reloc_kernel_output { /* we have test_progs[-flavor], so cut flavor part */ char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; struct task_struct { @@ -30,11 +31,25 @@ struct task_struct { struct task_struct *group_leader; }; +struct mm_struct___wrong { + int abc_whatever_should_not_exist; +}; + +struct task_struct___local { + int pid; + struct mm_struct___wrong *mm; +}; + #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) SEC("raw_tracepoint/sys_enter") int test_core_kernel(void *ctx) { + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct task_struct *task = (void *)bpf_get_current_task(); struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); @@ -93,6 +108,10 @@ int test_core_kernel(void *ctx) group_leader, group_leader, group_leader, group_leader, comm); + out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local); +#else + data.skip = true; +#endif return 0; } -- cgit v1.2.3-59-g8ed1b From f36068a20256bad993d60e49602f02e3af336506 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Jul 2022 16:59:25 -0700 Subject: selftests: tls: add selftest variant for pad Add a self-test variant with TLS 1.3 nopad set. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5d70b04c482c..e71ec5846be9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls) { uint16_t tls_version; uint16_t cipher_type; + bool nopad; }; FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) @@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) .cipher_type = TLS_CIPHER_AES_GCM_256, }; +FIXTURE_VARIANT_ADD(tls, 13_nopad) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + .nopad = true, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; + int one = 1; int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, @@ -315,6 +324,12 @@ FIXTURE_SETUP(tls) ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); + + if (variant->nopad) { + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&one, sizeof(one)); + ASSERT_EQ(ret, 0); + } } FIXTURE_TEARDOWN(tls) -- cgit v1.2.3-59-g8ed1b From 645d5d3bc001a7d77459cb8360c36a60954d6008 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Jul 2022 15:48:16 -0700 Subject: selftests/bpf: Fix bogus uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling selftests/bpf in optimized mode (-O2), GCC erroneously complains about uninitialized token variable: In file included from network_helpers.c:22: network_helpers.c: In function ‘open_netns’: test_progs.h:355:22: error: ‘token’ may be used uninitialized [-Werror=maybe-uninitialized] 355 | int ___err = libbpf_get_error(___res); \ | ^~~~~~~~~~~~~~~~~~~~~~~~ network_helpers.c:440:14: note: in expansion of macro ‘ASSERT_OK_PTR’ 440 | if (!ASSERT_OK_PTR(token, "malloc token")) | ^~~~~~~~~~~~~ In file included from /data/users/andriin/linux/tools/testing/selftests/bpf/tools/include/bpf/libbpf.h:21, from bpf_util.h:9, from network_helpers.c:20: /data/users/andriin/linux/tools/testing/selftests/bpf/tools/include/bpf/libbpf_legacy.h:113:17: note: by argument 1 of type ‘const void *’ to ‘libbpf_get_error’ declared here 113 | LIBBPF_API long libbpf_get_error(const void *ptr); | ^~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make: *** [Makefile:522: /data/users/andriin/linux/tools/testing/selftests/bpf/network_helpers.o] Error 1 This is completely bogus becuase libbpf_get_error() doesn't dereference pointer, but the only easy way to silence this is to allocate initialized memory with calloc(). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220705224818.4026623-2-andrii@kernel.org --- tools/testing/selftests/bpf/network_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 59cf81ec55af..bec15558fd93 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name) int err; struct nstoken *token; - token = malloc(sizeof(struct nstoken)); + token = calloc(1, sizeof(struct nstoken)); if (!ASSERT_OK_PTR(token, "malloc token")) return NULL; -- cgit v1.2.3-59-g8ed1b From c46a12200114ae5ad7b0922e6c8969a22c3afbd7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 5 Jul 2022 15:48:17 -0700 Subject: selftests/bpf: Fix few more compiler warnings When compiling with -O2, GCC detects few problems with selftests/bpf, so fix all of them. Two are real issues (uninitialized err and nums out-of-bounds access), but two other uninitialized variables warnings are due to GCC not being able to prove that variables are indeed initialized under conditions under which they are used. Fix all 4 cases, though. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220705224818.4026623-3-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 4 ++-- tools/testing/selftests/bpf/prog_tests/usdt.c | 2 +- tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 586dc52d6fb9..a8cb8a96ddaf 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp) struct hashmap *map; char buf[256]; FILE *f; - int err; + int err = 0; /* * The available_filter_functions contains many duplicates, @@ -404,7 +404,7 @@ static void test_bench_attach(void) double attach_delta, detach_delta; struct bpf_link *link = NULL; char **syms = NULL; - size_t cnt, i; + size_t cnt = 0, i; if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 5f733d50b0d7..9ad9da0f215e 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -12,7 +12,7 @@ int lets_test_this(int); static volatile int idx = 2; static volatile __u64 bla = 0xFEDCBA9876543210ULL; -static volatile short nums[] = {-1, -2, -3, }; +static volatile short nums[] = {-1, -2, -3, -4}; static volatile struct { int x; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index fb77a123fe89..874a846e298c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -63,7 +63,7 @@ static bool expect_str(char *buf, size_t size, const char *str, const char *name static void test_synproxy(bool xdp) { int server_fd = -1, client_fd = -1, accept_fd = -1; - char *prog_id, *prog_id_end; + char *prog_id = NULL, *prog_id_end; struct nstoken *ns = NULL; FILE *ctrl_file = NULL; char buf[CMD_OUT_BUF_SIZE]; -- cgit v1.2.3-59-g8ed1b From 2b4b2621fd6401865b31b9f403e4b936b7439e94 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 5 Jul 2022 12:00:18 -0700 Subject: selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage This benchmark measures grace period latency and kthread cpu usage of RCU Tasks Trace when many processes are creating/deleting BPF local_storage. Intent here is to quantify improvement on these metrics after Paul's recent RCU Tasks patches [0]. Specifically, fork 15k tasks which call a bpf prog that creates/destroys task local_storage and sleep in a loop, resulting in many call_rcu_tasks_trace calls. To determine grace period latency, trace time elapsed between rcu_tasks_trace_pregp_step and rcu_tasks_trace_postgp; for cpu usage look at rcu_task_trace_kthread's stime in /proc/PID/stat. On my virtualized test environment (Skylake, 8 cpus) benchmark results demonstrate significant improvement: BEFORE Paul's patches: SUMMARY tasks_trace grace period latency avg 22298.551 us stddev 1302.165 us SUMMARY ticks per tasks_trace grace period avg 2.291 stddev 0.324 AFTER Paul's patches: SUMMARY tasks_trace grace period latency avg 16969.197 us stddev 2525.053 us SUMMARY ticks per tasks_trace grace period avg 1.146 stddev 0.178 Note that since these patches are not in bpf-next benchmarking was done by cherry-picking this patch onto rcu tree. [0] https://lore.kernel.org/rcu/20220620225402.GA3842369@paulmck-ThinkPad-P17-Gen-1/ Signed-off-by: Dave Marchevsky Signed-off-by: Daniel Borkmann Acked-by: Paul E. McKenney Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220705190018.3239050-1-davemarchevsky@fb.com --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/bench.c | 42 +++ tools/testing/selftests/bpf/bench.h | 12 + .../benchs/bench_local_storage_rcu_tasks_trace.c | 281 +++++++++++++++++++++ .../run_bench_local_storage_rcu_tasks_trace.sh | 11 + .../progs/local_storage_rcu_tasks_trace_bench.c | 67 +++++ 6 files changed, 416 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh create mode 100644 tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e32a28fe8bc1..dfaac97222af 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -574,6 +574,7 @@ $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h +$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -587,7 +588,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_bpf_loop.o \ $(OUTPUT)/bench_strncmp.o \ $(OUTPUT)/bench_bpf_hashmap_full_update.o \ - $(OUTPUT)/bench_local_storage.o + $(OUTPUT)/bench_local_storage.o \ + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1e7b5d4b1f11..c1f20a147462 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } +void +grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + +void +grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; @@ -236,6 +273,7 @@ extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; extern struct argp bench_bpf_loop_argp; extern struct argp bench_local_storage_argp; +extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { @@ -244,6 +282,8 @@ static const struct argp_child bench_parsers[] = { { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, { &bench_local_storage_argp, 0, "local_storage benchmark", 0 }, { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, + { &bench_local_storage_rcu_tasks_trace_argp, 0, + "local_storage RCU Tasks Trace slowdown benchmark", 0 }, {}, }; @@ -449,6 +489,7 @@ extern const struct bench bench_bpf_hashmap_full_update; extern const struct bench bench_local_storage_cache_seq_get; extern const struct bench bench_local_storage_cache_interleaved_get; extern const struct bench bench_local_storage_cache_hashmap_control; +extern const struct bench bench_local_storage_tasks_trace; static const struct bench *benchs[] = { &bench_count_global, @@ -487,6 +528,7 @@ static const struct bench *benchs[] = { &bench_local_storage_cache_seq_get, &bench_local_storage_cache_interleaved_get, &bench_local_storage_cache_hashmap_control, + &bench_local_storage_tasks_trace, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 4b15286753ba..d748255877e2 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -30,11 +30,19 @@ struct env { struct cpu_set cons_cpus; }; +struct basic_stats { + double mean; + double stddev; +}; + struct bench_res { long hits; long drops; long false_hits; long important_hits; + unsigned long gp_ns; + unsigned long gp_ct; + unsigned int stime; }; struct bench { @@ -65,6 +73,10 @@ void ops_report_final(struct bench_res res[], int res_cnt); void local_storage_report_progress(int iter, struct bench_res *res, long delta_ns); void local_storage_report_final(struct bench_res res[], int res_cnt); +void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); +void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c new file mode 100644 index 000000000000..43f109d93130 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include + +#include +#include "local_storage_rcu_tasks_trace_bench.skel.h" +#include "bench.h" + +#include + +static struct { + __u32 nr_procs; + __u32 kthread_pid; + bool quiet; +} args = { + .nr_procs = 1000, + .kthread_pid = 0, + .quiet = false, +}; + +enum { + ARG_NR_PROCS = 7000, + ARG_KTHREAD_PID = 7001, + ARG_QUIET = 7002, +}; + +static const struct argp_option opts[] = { + { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, + "Set number of user processes to spin up"}, + { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, + "Pid of rcu_tasks_trace kthread for ticks tracking"}, + { "quiet", ARG_QUIET, "{0,1}", 0, + "If true, don't report progress"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_PROCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_procs\n"); + argp_usage(state); + } + args.nr_procs = ret; + break; + case ARG_KTHREAD_PID: + ret = strtol(arg, NULL, 10); + if (ret < 1) { + fprintf(stderr, "invalid kthread_pid\n"); + argp_usage(state); + } + args.kthread_pid = ret; + break; + case ARG_QUIET: + ret = strtol(arg, NULL, 10); + if (ret < 0 || ret > 1) { + fprintf(stderr, "invalid quiet %ld\n", ret); + argp_usage(state); + } + args.quiet = ret; + break; +break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_rcu_tasks_trace_argp = { + .options = opts, + .parser = parse_arg, +}; + +#define MAX_SLEEP_PROCS 150000 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_procs > MAX_SLEEP_PROCS) { + fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", + MAX_SLEEP_PROCS); + exit(1); + } +} + +static long kthread_pid_ticks(void) +{ + char procfs_path[100]; + long stime; + FILE *f; + + if (!args.kthread_pid) + return -1; + + sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); + f = fopen(procfs_path, "r"); + if (!f) { + fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); + goto err_out; + } + if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { + fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); + goto err_out; + } + fclose(f); + return stime; + +err_out: + if (f) + fclose(f); + exit(1); + return 0; +} + +static struct { + struct local_storage_rcu_tasks_trace_bench *skel; + long prev_kthread_stime; +} ctx; + +static void sleep_and_loop(void) +{ + while (true) { + sleep(rand() % 4); + syscall(__NR_getpgid); + } +} + +static void local_storage_tasks_trace_setup(void) +{ + int i, err, forkret, runner_pid; + + runner_pid = getpid(); + + for (i = 0; i < args.nr_procs; i++) { + forkret = fork(); + if (forkret < 0) { + fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, + args.nr_procs); + goto err_out; + } + + if (!forkret) { + err = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (err < 0) { + fprintf(stderr, "prctl failed with err %d, exiting\n", errno); + goto err_out; + } + + if (getppid() != runner_pid) { + fprintf(stderr, "Runner died while spinning up procs, exiting\n"); + goto err_out; + } + sleep_and_loop(); + } + } + printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); + + setup_libbpf(); + + ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "Error doing open_and_load, exiting\n"); + goto err_out; + } + + ctx.prev_kthread_stime = kthread_pid_ticks(); + + if (!bpf_program__attach(ctx.skel->progs.get_local)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.postgp)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void measure(struct bench_res *res) +{ + long ticks; + + res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); + res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); + ticks = kthread_pid_ticks(); + res->stime = ticks - ctx.prev_kthread_stime; + ctx.prev_kthread_stime = ticks; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + syscall(__NR_getpgid); + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + if (ctx.skel->bss->unexpected) { + fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); + fprintf(stderr, "Data can't be trusted, exiting\n"); + exit(1); + } + + if (args.quiet) + return; + + printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", + iter, res->gp_ns / (double)res->gp_ct); + printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", + iter, res->stime / (double)res->gp_ct); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + struct basic_stats gp_stat; + + grace_period_latency_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY tasks_trace grace period latency"); + printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); + grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY ticks per tasks_trace grace period"); + printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); +} + +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use + * of RCU Tasks-Trace. + * + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. + * The number of forked tasks is configurable. + * + * exercising code paths which call call_rcu_tasks_trace while there are many + * thousands of tasks on the system should result in RCU Tasks-Trace having to + * do a noticeable amount of work. + * + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage + * after the grace period has ended, or by measuring grace period latency. + * + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step + * and rcu_tasks_trace_postgp functions to measure grace period latency and + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks + */ +const struct bench bench_local_storage_tasks_trace = { + .name = "local-storage-tasks-trace", + .validate = validate, + .setup = local_storage_tasks_trace_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh new file mode 100755 index 000000000000..5dac1f02892c --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +kthread_pid=`pgrep rcu_tasks_trace_kthread` + +if [ -z $kthread_pid ]; then + echo "error: Couldn't find rcu_tasks_trace_kthread" + exit 1 +fi + +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c new file mode 100644 index 000000000000..03bf69f49075 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +long hits; +long gp_hits; +long gp_times; +long current_gp_start; +long unexpected; +bool postgp_seen; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct task_struct *task; + int idx; + int *s; + + idx = 0; + task = bpf_get_current_task_btf(); + s = bpf_task_storage_get(&task_storage, task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!s) + return 0; + + *s = 3; + bpf_task_storage_delete(&task_storage, task); + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("fentry/rcu_tasks_trace_pregp_step") +int pregp_step(struct pt_regs *ctx) +{ + current_gp_start = bpf_ktime_get_ns(); + return 0; +} + +SEC("fentry/rcu_tasks_trace_postgp") +int postgp(struct pt_regs *ctx) +{ + if (!current_gp_start && postgp_seen) { + /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't + * execute before this prog + */ + __sync_add_and_fetch(&unexpected, 1); + return 0; + } + + __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start); + __sync_add_and_fetch(&gp_hits, 1); + current_gp_start = 0; + postgp_seen = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From 018a8e75b49cb846ebfa48076bc4fe0bb67c9c24 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 7 Jul 2022 13:16:12 +0200 Subject: selftests, xsk: Rename AF_XDP testing app Recently, xsk part of libbpf was moved to selftests/bpf directory and lives on its own because there is an AF_XDP testing application that needs it called xdpxceiver. That name makes it a bit hard to indicate who maintains it as there are other XDP samples in there, whereas this one is strictly about AF_XDP. Do s/xdpxceiver/xskxceiver so that it will be easier to figure out who maintains it. A follow-up patch will correct MAINTAINERS file. Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220707111613.49031-2-maciej.fijalkowski@intel.com --- tools/testing/selftests/bpf/.gitignore | 2 +- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/test_xsk.sh | 6 +- tools/testing/selftests/bpf/xdpxceiver.c | 1682 ---------------------------- tools/testing/selftests/bpf/xdpxceiver.h | 172 --- tools/testing/selftests/bpf/xsk_prereqs.sh | 4 +- tools/testing/selftests/bpf/xskxceiver.c | 1682 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/xskxceiver.h | 172 +++ 8 files changed, 1862 insertions(+), 1862 deletions(-) delete mode 100644 tools/testing/selftests/bpf/xdpxceiver.c delete mode 100644 tools/testing/selftests/bpf/xdpxceiver.h create mode 100644 tools/testing/selftests/bpf/xskxceiver.c create mode 100644 tools/testing/selftests/bpf/xskxceiver.h (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index ca2f47f45670..3a8cb2404ea6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -41,6 +41,6 @@ test_cpp /bench *.ko *.tmp -xdpxceiver +xskxceiver xdp_redirect_multi xdp_synproxy diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dfaac97222af..8d59ec7f4c2d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver xdp_redirect_multi xdp_synproxy + xskxceiver xdp_redirect_multi xdp_synproxy TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -231,7 +231,7 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) -$(OUTPUT)/xdpxceiver: $(OUTPUT)/xsk.o +$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 567500299231..096a957594cd 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -47,7 +47,7 @@ # conflict with any existing interface # * tests the veth and xsk layers of the topology # -# See the source xdpxceiver.c for information on each test +# See the source xskxceiver.c for information on each test # # Kernel configuration: # --------------------- @@ -160,14 +160,14 @@ statusList=() TEST_NAME="XSK_SELFTESTS_SOFTIRQ" -execxdpxceiver +exec_xskxceiver cleanup_exit ${VETH0} ${VETH1} ${NS1} TEST_NAME="XSK_SELFTESTS_BUSY_POLL" busy_poll=1 setup_vethPairs -execxdpxceiver +exec_xskxceiver ## END TESTS diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c deleted file mode 100644 index 4c425a43e5b0..000000000000 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ /dev/null @@ -1,1682 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2020 Intel Corporation. */ - -/* - * Some functions in this program are taken from - * Linux kernel samples/bpf/xdpsock* and modified - * for use. - * - * See test_xsk.sh for detailed information on test topology - * and prerequisite network setup. - * - * This test program contains two threads, each thread is single socket with - * a unique UMEM. It validates in-order packet delivery and packet content - * by sending packets to each other. - * - * Tests Information: - * ------------------ - * These selftests test AF_XDP SKB and Native/DRV modes using veth - * Virtual Ethernet interfaces. - * - * For each mode, the following tests are run: - * a. nopoll - soft-irq processing in run-to-completion mode - * b. poll - using poll() syscall - * c. Socket Teardown - * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy - * both sockets, then repeat multiple times. Only nopoll mode is used - * d. Bi-directional sockets - * Configure sockets as bi-directional tx/rx sockets, sets up fill and - * completion rings on each socket, tx/rx in both directions. Only nopoll - * mode is used - * e. Statistics - * Trigger some error conditions and ensure that the appropriate statistics - * are incremented. Within this test, the following statistics are tested: - * i. rx dropped - * Increase the UMEM frame headroom to a value which results in - * insufficient space in the rx buffer for both the packet and the headroom. - * ii. tx invalid - * Set the 'len' field of tx descriptors to an invalid value (umem frame - * size + 1). - * iii. rx ring full - * Reduce the size of the RX ring to a fraction of the fill ring size. - * iv. fill queue empty - * Do not populate the fill queue and then try to receive pkts. - * f. bpf_link resource persistence - * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, - * then remove xsk sockets from queue 0 on both veth interfaces and - * finally run a traffic on queues ids 1 - * g. unaligned mode - * h. tests for invalid and corner case Tx descriptors so that the correct ones - * are discarded and let through, respectively. - * i. 2K frame size tests - * - * Total tests: 12 - * - * Flow: - * ----- - * - Single process spawns two threads: Tx and Rx - * - Each of these two threads attach to a veth interface within their assigned - * namespaces - * - Each thread Creates one AF_XDP socket connected to a unique umem for each - * veth interface - * - Tx thread Transmits 10k packets from veth to veth - * - Rx thread verifies if all 10k packets were received and delivered in-order, - * and have the right content - * - * Enable/disable packet dump mode: - * -------------------------- - * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add - * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D") - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "xsk.h" -#include "xdpxceiver.h" -#include "../kselftest.h" - -/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. - * Until xdpxceiver is either moved or re-writed into libxdp, suppress - * deprecation warnings in this file - */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - -static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; -static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; -static const char *IP1 = "192.168.100.162"; -static const char *IP2 = "192.168.100.161"; -static const u16 UDP_PORT1 = 2020; -static const u16 UDP_PORT2 = 2121; - -static void __exit_with_error(int error, const char *file, const char *func, int line) -{ - ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, - strerror(error)); - ksft_exit_xfail(); -} - -#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) - -#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" -#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" - -static void report_failure(struct test_spec *test) -{ - if (test->fail) - return; - - ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), - test->name); - test->fail = true; -} - -static void memset32_htonl(void *dest, u32 val, u32 size) -{ - u32 *ptr = (u32 *)dest; - int i; - - val = htonl(val); - - for (i = 0; i < (size & (~0x3)); i += 4) - ptr[i >> 2] = val; -} - -/* - * Fold a partial checksum - * This function code has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_fold(__u32 csum) -{ - u32 sum = (__force u32)csum; - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __u16)~sum; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static u32 from64to32(u64 x) -{ - /* add up 32-bit and 32-bit for 32+c bit */ - x = (x & 0xffffffff) + (x >> 32); - /* add up carry.. */ - x = (x & 0xffffffff) + (x >> 32); - return (u32)x; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - unsigned long long s = (__force u32)sum; - - s += (__force u32)saddr; - s += (__force u32)daddr; -#ifdef __BIG_ENDIAN__ - s += proto + len; -#else - s += (proto + len) << 8; -#endif - return (__force __u32)from64to32(s); -} - -/* - * This function has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); -} - -static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) -{ - u32 csum = 0; - u32 cnt = 0; - - /* udp hdr and data */ - for (; cnt < len; cnt += 2) - csum += udp_pkt[cnt >> 1]; - - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); -} - -static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) -{ - memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); -} - -static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) -{ - ip_hdr->version = IP_PKT_VER; - ip_hdr->ihl = 0x5; - ip_hdr->tos = IP_PKT_TOS; - ip_hdr->tot_len = htons(IP_PKT_SIZE); - ip_hdr->id = 0; - ip_hdr->frag_off = 0; - ip_hdr->ttl = IPDEFTTL; - ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ifobject->src_ip; - ip_hdr->daddr = ifobject->dst_ip; - ip_hdr->check = 0; -} - -static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, - struct udphdr *udp_hdr) -{ - udp_hdr->source = htons(ifobject->src_port); - udp_hdr->dest = htons(ifobject->dst_port); - udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); -} - -static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) -{ - udp_hdr->check = 0; - udp_hdr->check = - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); -} - -static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) -{ - struct xsk_umem_config cfg = { - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, - .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, - .frame_size = umem->frame_size, - .frame_headroom = umem->frame_headroom, - .flags = XSK_UMEM__DEFAULT_FLAGS - }; - int ret; - - if (umem->unaligned_mode) - cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; - - ret = xsk_umem__create(&umem->umem, buffer, size, - &umem->fq, &umem->cq, &cfg); - if (ret) - return ret; - - umem->buffer = buffer; - return 0; -} - -static void enable_busy_poll(struct xsk_socket_info *xsk) -{ - int sock_opt; - - sock_opt = 1; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); - - sock_opt = 20; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); - - sock_opt = BATCH_SIZE; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); -} - -static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, - struct ifobject *ifobject, bool shared) -{ - struct xsk_socket_config cfg = {}; - struct xsk_ring_cons *rxr; - struct xsk_ring_prod *txr; - - xsk->umem = umem; - cfg.rx_size = xsk->rxqsize; - cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; - cfg.xdp_flags = ifobject->xdp_flags; - cfg.bind_flags = ifobject->bind_flags; - if (shared) - cfg.bind_flags |= XDP_SHARED_UMEM; - - txr = ifobject->tx_on ? &xsk->tx : NULL; - rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg); -} - -static struct option long_options[] = { - {"interface", required_argument, 0, 'i'}, - {"busy-poll", no_argument, 0, 'b'}, - {"dump-pkts", no_argument, 0, 'D'}, - {"verbose", no_argument, 0, 'v'}, - {0, 0, 0, 0} -}; - -static void usage(const char *prog) -{ - const char *str = - " Usage: %s [OPTIONS]\n" - " Options:\n" - " -i, --interface Use interface\n" - " -D, --dump-pkts Dump packets L2 - L5\n" - " -v, --verbose Verbose output\n" - " -b, --busy-poll Enable busy poll\n"; - - ksft_print_msg(str, prog); -} - -static int switch_namespace(const char *nsname) -{ - char fqns[26] = "/var/run/netns/"; - int nsfd; - - if (!nsname || strlen(nsname) == 0) - return -1; - - strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1); - nsfd = open(fqns, O_RDONLY); - - if (nsfd == -1) - exit_with_error(errno); - - if (setns(nsfd, 0) == -1) - exit_with_error(errno); - - print_verbose("NS switched: %s\n", nsname); - - return nsfd; -} - -static bool validate_interface(struct ifobject *ifobj) -{ - if (!strcmp(ifobj->ifname, "")) - return false; - return true; -} - -static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc, - char **argv) -{ - struct ifobject *ifobj; - u32 interface_nb = 0; - int option_index, c; - - opterr = 0; - - for (;;) { - char *sptr, *token; - - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); - if (c == -1) - break; - - switch (c) { - case 'i': - if (interface_nb == 0) - ifobj = ifobj_tx; - else if (interface_nb == 1) - ifobj = ifobj_rx; - else - break; - - sptr = strndupa(optarg, strlen(optarg)); - memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); - token = strsep(&sptr, ","); - if (token) - memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); - interface_nb++; - break; - case 'D': - opt_pkt_dump = true; - break; - case 'v': - opt_verbose = true; - break; - case 'b': - ifobj_tx->busy_poll = true; - ifobj_rx->busy_poll = true; - break; - default: - usage(basename(argv[0])); - ksft_exit_xfail(); - } - } -} - -static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx) -{ - u32 i, j; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->xsk = &ifobj->xsk_arr[0]; - ifobj->use_poll = false; - ifobj->use_fill_ring = true; - ifobj->release_rx = true; - ifobj->pkt_stream = test->pkt_stream_default; - ifobj->validation_func = NULL; - - if (i == 0) { - ifobj->rx_on = false; - ifobj->tx_on = true; - } else { - ifobj->rx_on = true; - ifobj->tx_on = false; - } - - memset(ifobj->umem, 0, sizeof(*ifobj->umem)); - ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; - ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - - for (j = 0; j < MAX_SOCKETS; j++) { - memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); - ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - } - } - - test->ifobj_tx = ifobj_tx; - test->ifobj_rx = ifobj_rx; - test->current_step = 0; - test->total_steps = 1; - test->nb_sockets = 1; - test->fail = false; -} - -static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx, enum test_mode mode) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = test->pkt_stream_default; - memset(test, 0, sizeof(*test)); - test->pkt_stream_default = pkt_stream; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; - if (mode == TEST_MODE_SKB) - ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE; - else - ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE; - - ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; - } - - __test_spec_init(test, ifobj_tx, ifobj_rx); -} - -static void test_spec_reset(struct test_spec *test) -{ - __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); -} - -static void test_spec_set_name(struct test_spec *test, const char *name) -{ - strncpy(test->name, name, MAX_TEST_NAME_SIZE); -} - -static void pkt_stream_reset(struct pkt_stream *pkt_stream) -{ - if (pkt_stream) - pkt_stream->rx_pkt_nb = 0; -} - -static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) -{ - if (pkt_nb >= pkt_stream->nb_pkts) - return NULL; - - return &pkt_stream->pkts[pkt_nb]; -} - -static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) -{ - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { - (*pkts_sent)++; - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; - pkt_stream->rx_pkt_nb++; - } - return NULL; -} - -static void pkt_stream_delete(struct pkt_stream *pkt_stream) -{ - free(pkt_stream->pkts); - free(pkt_stream); -} - -static void pkt_stream_restore_default(struct test_spec *test) -{ - struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; - - if (tx_pkt_stream != test->pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->pkt_stream); - test->ifobj_tx->pkt_stream = test->pkt_stream_default; - } - - if (test->ifobj_rx->pkt_stream != test->pkt_stream_default && - test->ifobj_rx->pkt_stream != tx_pkt_stream) - pkt_stream_delete(test->ifobj_rx->pkt_stream); - test->ifobj_rx->pkt_stream = test->pkt_stream_default; -} - -static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = calloc(1, sizeof(*pkt_stream)); - if (!pkt_stream) - return NULL; - - pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); - if (!pkt_stream->pkts) { - free(pkt_stream); - return NULL; - } - - pkt_stream->nb_pkts = nb_pkts; - return pkt_stream; -} - -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) -{ - pkt->addr = addr; - pkt->len = len; - if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) - pkt->valid = false; - else - pkt->valid = true; -} - -static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = __pkt_stream_alloc(nb_pkts); - if (!pkt_stream) - exit_with_error(ENOMEM); - - pkt_stream->nb_pkts = nb_pkts; - for (i = 0; i < nb_pkts; i++) { - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, - pkt_len); - pkt_stream->pkts[i].payload = i; - } - - return pkt_stream; -} - -static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, - struct pkt_stream *pkt_stream) -{ - return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); -} - -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; -} - -static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) -{ - struct xsk_umem_info *umem = test->ifobj_tx->umem; - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); - for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); - - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; -} - -static void pkt_stream_receive_half(struct test_spec *test) -{ - struct xsk_umem_info *umem = test->ifobj_rx->umem; - struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; - u32 i; - - test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, - pkt_stream->pkts[0].len); - pkt_stream = test->ifobj_rx->pkt_stream; - for (i = 1; i < pkt_stream->nb_pkts; i += 2) - pkt_stream->pkts[i].valid = false; -} - -static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) -{ - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); - struct udphdr *udp_hdr; - struct ethhdr *eth_hdr; - struct iphdr *ip_hdr; - void *data; - - if (!pkt) - return NULL; - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) - return pkt; - - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); - eth_hdr = (struct ethhdr *)data; - - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); - gen_ip_hdr(ifobject, ip_hdr); - gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr(ifobject, eth_hdr); - - return pkt; -} - -static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = __pkt_stream_alloc(nb_pkts); - if (!pkt_stream) - exit_with_error(ENOMEM); - - test->ifobj_tx->pkt_stream = pkt_stream; - test->ifobj_rx->pkt_stream = pkt_stream; - - for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = pkts[i].addr; - pkt_stream->pkts[i].len = pkts[i].len; - pkt_stream->pkts[i].payload = i; - pkt_stream->pkts[i].valid = pkts[i].valid; - } -} - -static void pkt_dump(void *pkt, u32 len) -{ - char s[INET_ADDRSTRLEN]; - struct ethhdr *ethhdr; - struct udphdr *udphdr; - struct iphdr *iphdr; - int payload, i; - - ethhdr = pkt; - iphdr = pkt + sizeof(*ethhdr); - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); - - /*extract L2 frame */ - fprintf(stdout, "DEBUG>> L2: dst mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_dest[i]); - - fprintf(stdout, "\nDEBUG>> L2: src mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_source[i]); - - /*extract L3 frame */ - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); - /*extract L4 frame */ - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); - /*extract L5 frame */ - payload = *((uint32_t *)(pkt + PKT_HDR_SIZE)); - - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); - fprintf(stdout, "---------------------------------------\n"); -} - -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, - u64 pkt_stream_addr) -{ - u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset = 0; - - if (!pkt_stream->use_addr_for_fill) - pkt_stream_addr = 0; - - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; - - if (offset == expected_offset) - return true; - - ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); - return false; -} - -static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) -{ - void *data = xsk_umem__get_data(buffer, addr); - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); - - if (!pkt) { - ksft_print_msg("[%s] too many packets received\n", __func__); - return false; - } - - if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { - /* Do not try to verify packets that are smaller than minimum size. */ - return true; - } - - if (pkt->len != len) { - ksft_print_msg("[%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, len); - return false; - } - - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); - - if (opt_pkt_dump) - pkt_dump(data, PKT_SIZE); - - if (pkt->payload != seqnum) { - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); - return false; - } - } else { - ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, - iphdr->tos); - return false; - } - - return true; -} - -static void kick_tx(struct xsk_socket_info *xsk) -{ - int ret; - - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - if (ret >= 0) - return; - if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { - usleep(100); - return; - } - exit_with_error(errno); -} - -static void kick_rx(struct xsk_socket_info *xsk) -{ - int ret; - - ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); - if (ret < 0) - exit_with_error(errno); -} - -static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) -{ - unsigned int rcvd; - u32 idx; - - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) - kick_tx(xsk); - - rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); - if (rcvd) { - if (rcvd > xsk->outstanding_tx) { - u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - - ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); - return TEST_FAILURE; - } - - xsk_ring_cons__release(&xsk->umem->cq, rcvd); - xsk->outstanding_tx -= rcvd; - } - - return TEST_PASS; -} - -static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) -{ - struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; - u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; - struct pkt_stream *pkt_stream = ifobj->pkt_stream; - struct xsk_socket_info *xsk = ifobj->xsk; - struct xsk_umem_info *umem = xsk->umem; - struct pkt *pkt; - int ret; - - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - timeradd(&tv_now, &tv_timeout, &tv_end); - - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - while (pkt) { - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); - return TEST_FAILURE; - } - - kick_rx(xsk); - - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(-ret); - } - continue; - } - - if (ifobj->use_fill_ring) { - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (ret < 0) - exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(-ret); - } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - } - } - - for (i = 0; i < rcvd; i++) { - const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); - u64 addr = desc->addr, orig; - - orig = xsk_umem__extract_addr(addr); - addr = xsk_umem__add_offset_to_addr(addr); - - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || - !is_offset_correct(umem, pkt_stream, addr, pkt->addr)) - return TEST_FAILURE; - - if (ifobj->use_fill_ring) - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - } - - if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, rcvd); - if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, rcvd); - - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= pkts_sent; - if (pkts_in_flight < umem->num_frames) - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); - pkts_sent = 0; - } - - return TEST_PASS; -} - -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) -{ - struct xsk_socket_info *xsk = ifobject->xsk; - u32 i, idx, valid_pkts = 0; - - while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) - complete_pkts(xsk, BATCH_SIZE); - - for (i = 0; i < BATCH_SIZE; i++) { - struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); - - if (!pkt) - break; - - tx_desc->addr = pkt->addr; - tx_desc->len = pkt->len; - (*pkt_nb)++; - if (pkt->valid) - valid_pkts++; - } - - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight += valid_pkts; - /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { - kick_tx(xsk); - pthread_cond_wait(&pacing_cond, &pacing_mutex); - } - pthread_mutex_unlock(&pacing_mutex); - - xsk_ring_prod__submit(&xsk->tx, i); - xsk->outstanding_tx += valid_pkts; - if (complete_pkts(xsk, i)) - return TEST_FAILURE; - - usleep(10); - return TEST_PASS; -} - -static void wait_for_tx_completion(struct xsk_socket_info *xsk) -{ - while (xsk->outstanding_tx) - complete_pkts(xsk, BATCH_SIZE); -} - -static int send_pkts(struct test_spec *test, struct ifobject *ifobject) -{ - struct pollfd fds = { }; - u32 pkt_cnt = 0; - - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLOUT; - - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - int err; - - if (ifobject->use_poll) { - int ret; - - ret = poll(&fds, 1, POLL_TMOUT); - if (ret <= 0) - continue; - - if (!(fds.revents & POLLOUT)) - continue; - } - - err = __send_pkts(ifobject, &pkt_cnt); - if (err || test->fail) - return TEST_FAILURE; - } - - wait_for_tx_completion(ifobject->xsk); - return TEST_PASS; -} - -static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) -{ - int fd = xsk_socket__fd(xsk), err; - socklen_t optlen, expected_len; - - optlen = sizeof(*stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); - if (err) { - ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return TEST_FAILURE; - } - - expected_len = sizeof(struct xdp_statistics); - if (optlen != expected_len) { - ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", - __func__, expected_len, optlen); - return TEST_FAILURE; - } - - return TEST_PASS; -} - -static int validate_rx_dropped(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - kick_rx(ifobject->xsk); - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_rx_full(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - usleep(1000); - kick_rx(ifobject->xsk); - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - if (stats.rx_ring_full) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_fill_empty(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - usleep(1000); - kick_rx(ifobject->xsk); - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - if (stats.rx_fill_ring_empty_descs) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_tx_invalid_descs(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - int fd = xsk_socket__fd(xsk); - struct xdp_statistics stats; - socklen_t optlen; - int err; - - optlen = sizeof(stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); - if (err) { - ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return TEST_FAILURE; - } - - if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); - return TEST_FAILURE; - } - - return TEST_PASS; -} - -static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) -{ - u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; - int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - LIBBPF_OPTS(bpf_xdp_query_opts, opts); - int ret, ifindex; - void *bufs; - u32 i; - - ifobject->ns_fd = switch_namespace(ifobject->nsname); - - if (ifobject->umem->unaligned_mode) - mmap_flags |= MAP_HUGETLB; - - bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); - - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); - if (ret) - exit_with_error(-ret); - - for (i = 0; i < test->nb_sockets; i++) { - u32 ctr = 0; - - while (ctr++ < SOCK_RECONF_CTR) { - ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem, - ifobject, !!i); - if (!ret) - break; - - /* Retry if it fails as xsk_socket__create() is asynchronous */ - if (ctr >= SOCK_RECONF_CTR) - exit_with_error(-ret); - usleep(USLEEP_MAX); - } - - if (ifobject->busy_poll) - enable_busy_poll(&ifobject->xsk_arr[i]); - } - - ifobject->xsk = &ifobject->xsk_arr[0]; - - if (!ifobject->rx_on) - return; - - ifindex = if_nametoindex(ifobject->ifname); - if (!ifindex) - exit_with_error(errno); - - ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); - if (ret) - exit_with_error(-ret); - - ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); - if (ret) - exit_with_error(-ret); - - if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { - if (opts.attach_mode != XDP_ATTACHED_SKB) { - ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); - exit_with_error(-EINVAL); - } - } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { - if (opts.attach_mode != XDP_ATTACHED_DRV) { - ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); - exit_with_error(-EINVAL); - } - } - - ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); - if (ret) - exit_with_error(-ret); -} - -static void testapp_cleanup_xsk_res(struct ifobject *ifobj) -{ - print_verbose("Destroying socket\n"); - xsk_socket__delete(ifobj->xsk->xsk); - munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size); - xsk_umem__delete(ifobj->umem->umem); -} - -static void *worker_testapp_validate_tx(void *arg) -{ - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_tx; - int err; - - if (test->current_step == 1) - thread_common_ops(test, ifobject); - - print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, - ifobject->ifname); - err = send_pkts(test, ifobject); - - if (!err && ifobject->validation_func) - err = ifobject->validation_func(ifobject); - if (err) - report_failure(test); - - if (test->total_steps == test->current_step || err) - testapp_cleanup_xsk_res(ifobject); - pthread_exit(NULL); -} - -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) -{ - u32 idx = 0, i, buffers_to_fill; - int ret; - - if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) - buffers_to_fill = umem->num_frames; - else - buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; - - ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); - if (ret != buffers_to_fill) - exit_with_error(ENOSPC); - for (i = 0; i < buffers_to_fill; i++) { - u64 addr; - - if (pkt_stream->use_addr_for_fill) { - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); - - if (!pkt) - break; - addr = pkt->addr; - } else { - addr = i * umem->frame_size; - } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; - } - xsk_ring_prod__submit(&umem->fq, buffers_to_fill); -} - -static void *worker_testapp_validate_rx(void *arg) -{ - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_rx; - struct pollfd fds = { }; - int err; - - if (test->current_step == 1) - thread_common_ops(test, ifobject); - - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); - - fds.fd = xsk_socket__fd(ifobject->xsk->xsk); - fds.events = POLLIN; - - pthread_barrier_wait(&barr); - - err = receive_pkts(ifobject, &fds); - - if (!err && ifobject->validation_func) - err = ifobject->validation_func(ifobject); - if (err) { - report_failure(test); - pthread_mutex_lock(&pacing_mutex); - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); - } - - if (test->total_steps == test->current_step || err) - testapp_cleanup_xsk_res(ifobject); - pthread_exit(NULL); -} - -static int testapp_validate_traffic(struct test_spec *test) -{ - struct ifobject *ifobj_tx = test->ifobj_tx; - struct ifobject *ifobj_rx = test->ifobj_rx; - pthread_t t0, t1; - - if (pthread_barrier_init(&barr, NULL, 2)) - exit_with_error(errno); - - test->current_step++; - pkt_stream_reset(ifobj_rx->pkt_stream); - pkts_in_flight = 0; - - /*Spawn RX thread */ - pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); - - pthread_barrier_wait(&barr); - if (pthread_barrier_destroy(&barr)) - exit_with_error(errno); - - /*Spawn TX thread */ - pthread_create(&t1, NULL, ifobj_tx->func_ptr, test); - - pthread_join(t1, NULL); - pthread_join(t0, NULL); - - return !!test->fail; -} - -static void testapp_teardown(struct test_spec *test) -{ - int i; - - test_spec_set_name(test, "TEARDOWN"); - for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - if (testapp_validate_traffic(test)) - return; - test_spec_reset(test); - } -} - -static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) -{ - thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; - struct ifobject *tmp_ifobj = (*ifobj1); - - (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; - (*ifobj2)->func_ptr = tmp_func_ptr; - - *ifobj1 = *ifobj2; - *ifobj2 = tmp_ifobj; -} - -static void testapp_bidi(struct test_spec *test) -{ - test_spec_set_name(test, "BIDIRECTIONAL"); - test->ifobj_tx->rx_on = true; - test->ifobj_rx->tx_on = true; - test->total_steps = 2; - if (testapp_validate_traffic(test)) - return; - - print_verbose("Switching Tx/Rx vectors\n"); - swap_directions(&test->ifobj_rx, &test->ifobj_tx); - testapp_validate_traffic(test); - - swap_directions(&test->ifobj_rx, &test->ifobj_tx); -} - -static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) -{ - int ret; - - xsk_socket__delete(ifobj_tx->xsk->xsk); - xsk_socket__delete(ifobj_rx->xsk->xsk); - ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; - ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; - - ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd); - if (ret) - exit_with_error(-ret); -} - -static void testapp_bpf_res(struct test_spec *test) -{ - test_spec_set_name(test, "BPF_RES"); - test->total_steps = 2; - test->nb_sockets = 2; - if (testapp_validate_traffic(test)) - return; - - swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); - testapp_validate_traffic(test); -} - -static void testapp_headroom(struct test_spec *test) -{ - test_spec_set_name(test, "UMEM_HEADROOM"); - test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - testapp_validate_traffic(test); -} - -static void testapp_stats_rx_dropped(struct test_spec *test) -{ - test_spec_set_name(test, "STAT_RX_DROPPED"); - test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; - pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); - pkt_stream_receive_half(test); - test->ifobj_rx->validation_func = validate_rx_dropped; - testapp_validate_traffic(test); -} - -static void testapp_stats_tx_invalid_descs(struct test_spec *test) -{ - test_spec_set_name(test, "STAT_TX_INVALID"); - pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); - test->ifobj_tx->validation_func = validate_tx_invalid_descs; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); -} - -static void testapp_stats_rx_full(struct test_spec *test) -{ - test_spec_set_name(test, "STAT_RX_FULL"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); - - test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; - test->ifobj_rx->release_rx = false; - test->ifobj_rx->validation_func = validate_rx_full; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); -} - -static void testapp_stats_fill_empty(struct test_spec *test) -{ - test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); - - test->ifobj_rx->use_fill_ring = false; - test->ifobj_rx->validation_func = validate_fill_empty; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); -} - -/* Simple test */ -static bool hugepages_present(struct ifobject *ifobject) -{ - const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); - if (bufs == MAP_FAILED) - return false; - - munmap(bufs, mmap_sz); - return true; -} - -static bool testapp_unaligned(struct test_spec *test) -{ - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return false; - } - - test_spec_set_name(test, "UNALIGNED_MODE"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - return true; -} - -static void testapp_single_pkt(struct test_spec *test) -{ - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); - pkt_stream_restore_default(test); -} - -static void testapp_invalid_desc(struct test_spec *test) -{ - struct pkt pkts[] = { - /* Zero packet address allowed */ - {0, PKT_SIZE, 0, true}, - /* Allowed packet */ - {0x1000, PKT_SIZE, 0, true}, - /* Straddling the start of umem */ - {-2, PKT_SIZE, 0, false}, - /* Packet too large */ - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, - /* After umem ends */ - {UMEM_SIZE, PKT_SIZE, 0, false}, - /* Straddle the end of umem */ - {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a page boundrary */ - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a 2K boundrary */ - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, - /* Valid packet for synch so that something is received */ - {0x4000, PKT_SIZE, 0, true}}; - - if (test->ifobj_tx->umem->unaligned_mode) { - /* Crossing a page boundrary allowed */ - pkts[6].valid = true; - } - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundrary not allowed */ - pkts[7].valid = false; - } - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); - pkt_stream_restore_default(test); -} - -static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - const char *dst_ip, const char *src_ip, const u16 dst_port, - const u16 src_port, thread_func_t func_ptr) -{ - struct in_addr ip; - - memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); - memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - - inet_aton(dst_ip, &ip); - ifobj->dst_ip = ip.s_addr; - - inet_aton(src_ip, &ip); - ifobj->src_ip = ip.s_addr; - - ifobj->dst_port = dst_port; - ifobj->src_port = src_port; - - ifobj->func_ptr = func_ptr; -} - -static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) -{ - switch (type) { - case TEST_TYPE_STATS_RX_DROPPED: - testapp_stats_rx_dropped(test); - break; - case TEST_TYPE_STATS_TX_INVALID_DESCS: - testapp_stats_tx_invalid_descs(test); - break; - case TEST_TYPE_STATS_RX_FULL: - testapp_stats_rx_full(test); - break; - case TEST_TYPE_STATS_FILL_EMPTY: - testapp_stats_fill_empty(test); - break; - case TEST_TYPE_TEARDOWN: - testapp_teardown(test); - break; - case TEST_TYPE_BIDI: - testapp_bidi(test); - break; - case TEST_TYPE_BPF_RES: - testapp_bpf_res(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION: - test_spec_set_name(test, "RUN_TO_COMPLETION"); - testapp_validate_traffic(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: - test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - testapp_single_pkt(test); - break; - case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: - test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - break; - case TEST_TYPE_POLL: - test->ifobj_tx->use_poll = true; - test->ifobj_rx->use_poll = true; - test_spec_set_name(test, "POLL"); - testapp_validate_traffic(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC: - test_spec_set_name(test, "ALIGNED_INV_DESC"); - testapp_invalid_desc(test); - break; - case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: - test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED_INV_DESC: - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } - test_spec_set_name(test, "UNALIGNED_INV_DESC"); - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - testapp_invalid_desc(test); - break; - case TEST_TYPE_UNALIGNED: - if (!testapp_unaligned(test)) - return; - break; - case TEST_TYPE_HEADROOM: - testapp_headroom(test); - break; - default: - break; - } - - if (!test->fail) - ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), - test->name); -} - -static struct ifobject *ifobject_create(void) -{ - struct ifobject *ifobj; - - ifobj = calloc(1, sizeof(struct ifobject)); - if (!ifobj) - return NULL; - - ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); - if (!ifobj->xsk_arr) - goto out_xsk_arr; - - ifobj->umem = calloc(1, sizeof(*ifobj->umem)); - if (!ifobj->umem) - goto out_umem; - - return ifobj; - -out_umem: - free(ifobj->xsk_arr); -out_xsk_arr: - free(ifobj); - return NULL; -} - -static void ifobject_delete(struct ifobject *ifobj) -{ - free(ifobj->umem); - free(ifobj->xsk_arr); - free(ifobj); -} - -int main(int argc, char **argv) -{ - struct pkt_stream *pkt_stream_default; - struct ifobject *ifobj_tx, *ifobj_rx; - u32 i, j, failed_tests = 0; - struct test_spec test; - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - ifobj_tx = ifobject_create(); - if (!ifobj_tx) - exit_with_error(ENOMEM); - ifobj_rx = ifobject_create(); - if (!ifobj_rx) - exit_with_error(ENOMEM); - - setlocale(LC_ALL, ""); - - parse_command_line(ifobj_tx, ifobj_rx, argc, argv); - - if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { - usage(basename(argv[0])); - ksft_exit_xfail(); - } - - init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_tx); - init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_rx); - - test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); - if (!pkt_stream_default) - exit_with_error(ENOMEM); - test.pkt_stream_default = pkt_stream_default; - - ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); - - for (i = 0; i < TEST_MODE_MAX; i++) - for (j = 0; j < TEST_TYPE_MAX; j++) { - test_spec_init(&test, ifobj_tx, ifobj_rx, i); - run_pkt_test(&test, i, j); - usleep(USLEEP_MAX); - - if (test.fail) - failed_tests++; - } - - pkt_stream_delete(pkt_stream_default); - ifobject_delete(ifobj_tx); - ifobject_delete(ifobj_rx); - - if (failed_tests) - ksft_exit_fail(); - else - ksft_exit_pass(); -} diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h deleted file mode 100644 index 8f672b0fe0e1..000000000000 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ /dev/null @@ -1,172 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright(c) 2020 Intel Corporation. - */ - -#ifndef XDPXCEIVER_H_ -#define XDPXCEIVER_H_ - -#ifndef SOL_XDP -#define SOL_XDP 283 -#endif - -#ifndef AF_XDP -#define AF_XDP 44 -#endif - -#ifndef PF_XDP -#define PF_XDP AF_XDP -#endif - -#ifndef SO_BUSY_POLL_BUDGET -#define SO_BUSY_POLL_BUDGET 70 -#endif - -#ifndef SO_PREFER_BUSY_POLL -#define SO_PREFER_BUSY_POLL 69 -#endif - -#define TEST_PASS 0 -#define TEST_FAILURE -1 -#define MAX_INTERFACES 2 -#define MAX_INTERFACE_NAME_CHARS 7 -#define MAX_INTERFACES_NAMESPACE_CHARS 10 -#define MAX_SOCKETS 2 -#define MAX_TEST_NAME_SIZE 32 -#define MAX_TEARDOWN_ITER 10 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) -#define MIN_ETH_PKT_SIZE 64 -#define ETH_FCS_SIZE 4 -#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) -#define PKT_SIZE (MIN_PKT_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) -#define IP_PKT_VER 0x4 -#define IP_PKT_TOS 0x9 -#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) -#define USLEEP_MAX 10000 -#define SOCK_RECONF_CTR 10 -#define BATCH_SIZE 64 -#define POLL_TMOUT 1000 -#define RECV_TMOUT 3 -#define DEFAULT_PKT_CNT (4 * 1024) -#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) -#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) -#define RX_FULL_RXQSIZE 32 -#define UMEM_HEADROOM_TEST_SIZE 128 -#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) - -#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) - -enum test_mode { - TEST_MODE_SKB, - TEST_MODE_DRV, - TEST_MODE_MAX -}; - -enum test_type { - TEST_TYPE_RUN_TO_COMPLETION, - TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, - TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, - TEST_TYPE_POLL, - TEST_TYPE_UNALIGNED, - TEST_TYPE_ALIGNED_INV_DESC, - TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, - TEST_TYPE_UNALIGNED_INV_DESC, - TEST_TYPE_HEADROOM, - TEST_TYPE_TEARDOWN, - TEST_TYPE_BIDI, - TEST_TYPE_STATS_RX_DROPPED, - TEST_TYPE_STATS_TX_INVALID_DESCS, - TEST_TYPE_STATS_RX_FULL, - TEST_TYPE_STATS_FILL_EMPTY, - TEST_TYPE_BPF_RES, - TEST_TYPE_MAX -}; - -static bool opt_pkt_dump; -static bool opt_verbose; - -struct xsk_umem_info { - struct xsk_ring_prod fq; - struct xsk_ring_cons cq; - struct xsk_umem *umem; - u32 num_frames; - u32 frame_headroom; - void *buffer; - u32 frame_size; - bool unaligned_mode; -}; - -struct xsk_socket_info { - struct xsk_ring_cons rx; - struct xsk_ring_prod tx; - struct xsk_umem_info *umem; - struct xsk_socket *xsk; - u32 outstanding_tx; - u32 rxqsize; -}; - -struct pkt { - u64 addr; - u32 len; - u32 payload; - bool valid; -}; - -struct pkt_stream { - u32 nb_pkts; - u32 rx_pkt_nb; - struct pkt *pkts; - bool use_addr_for_fill; -}; - -struct ifobject; -typedef int (*validation_func_t)(struct ifobject *ifobj); -typedef void *(*thread_func_t)(void *arg); - -struct ifobject { - char ifname[MAX_INTERFACE_NAME_CHARS]; - char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; - struct xsk_socket_info *xsk; - struct xsk_socket_info *xsk_arr; - struct xsk_umem_info *umem; - thread_func_t func_ptr; - validation_func_t validation_func; - struct pkt_stream *pkt_stream; - int ns_fd; - int xsk_map_fd; - u32 dst_ip; - u32 src_ip; - u32 xdp_flags; - u32 bind_flags; - u16 src_port; - u16 dst_port; - bool tx_on; - bool rx_on; - bool use_poll; - bool busy_poll; - bool use_fill_ring; - bool release_rx; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; -}; - -struct test_spec { - struct ifobject *ifobj_tx; - struct ifobject *ifobj_rx; - struct pkt_stream *pkt_stream_default; - u16 total_steps; - u16 current_step; - u16 nb_sockets; - bool fail; - char name[MAX_TEST_NAME_SIZE]; -}; - -pthread_barrier_t barr; -pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; - -int pkts_in_flight; - -#endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 684e813803ec..a0b71723a818 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -8,7 +8,7 @@ ksft_xfail=2 ksft_xpass=3 ksft_skip=4 -XSKOBJ=xdpxceiver +XSKOBJ=xskxceiver validate_root_exec() { @@ -77,7 +77,7 @@ validate_ip_utility() [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; } } -execxdpxceiver() +exec_xskxceiver() { if [[ $busy_poll -eq 1 ]]; then ARGS+="-b " diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c new file mode 100644 index 000000000000..74d56d971baf --- /dev/null +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -0,0 +1,1682 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2020 Intel Corporation. */ + +/* + * Some functions in this program are taken from + * Linux kernel samples/bpf/xdpsock* and modified + * for use. + * + * See test_xsk.sh for detailed information on test topology + * and prerequisite network setup. + * + * This test program contains two threads, each thread is single socket with + * a unique UMEM. It validates in-order packet delivery and packet content + * by sending packets to each other. + * + * Tests Information: + * ------------------ + * These selftests test AF_XDP SKB and Native/DRV modes using veth + * Virtual Ethernet interfaces. + * + * For each mode, the following tests are run: + * a. nopoll - soft-irq processing in run-to-completion mode + * b. poll - using poll() syscall + * c. Socket Teardown + * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy + * both sockets, then repeat multiple times. Only nopoll mode is used + * d. Bi-directional sockets + * Configure sockets as bi-directional tx/rx sockets, sets up fill and + * completion rings on each socket, tx/rx in both directions. Only nopoll + * mode is used + * e. Statistics + * Trigger some error conditions and ensure that the appropriate statistics + * are incremented. Within this test, the following statistics are tested: + * i. rx dropped + * Increase the UMEM frame headroom to a value which results in + * insufficient space in the rx buffer for both the packet and the headroom. + * ii. tx invalid + * Set the 'len' field of tx descriptors to an invalid value (umem frame + * size + 1). + * iii. rx ring full + * Reduce the size of the RX ring to a fraction of the fill ring size. + * iv. fill queue empty + * Do not populate the fill queue and then try to receive pkts. + * f. bpf_link resource persistence + * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0, + * then remove xsk sockets from queue 0 on both veth interfaces and + * finally run a traffic on queues ids 1 + * g. unaligned mode + * h. tests for invalid and corner case Tx descriptors so that the correct ones + * are discarded and let through, respectively. + * i. 2K frame size tests + * + * Total tests: 12 + * + * Flow: + * ----- + * - Single process spawns two threads: Tx and Rx + * - Each of these two threads attach to a veth interface within their assigned + * namespaces + * - Each thread Creates one AF_XDP socket connected to a unique umem for each + * veth interface + * - Tx thread Transmits 10k packets from veth to veth + * - Rx thread verifies if all 10k packets were received and delivered in-order, + * and have the right content + * + * Enable/disable packet dump mode: + * -------------------------- + * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add + * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D") + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xsk.h" +#include "xskxceiver.h" +#include "../kselftest.h" + +/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. + * Until xskxceiver is either moved or re-writed into libxdp, suppress + * deprecation warnings in this file + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; +static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; +static const char *IP1 = "192.168.100.162"; +static const char *IP2 = "192.168.100.161"; +static const u16 UDP_PORT1 = 2020; +static const u16 UDP_PORT2 = 2121; + +static void __exit_with_error(int error, const char *file, const char *func, int line) +{ + ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, + strerror(error)); + ksft_exit_xfail(); +} + +#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) + +#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" +#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" + +static void report_failure(struct test_spec *test) +{ + if (test->fail) + return; + + ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); + test->fail = true; +} + +static void memset32_htonl(void *dest, u32 val, u32 size) +{ + u32 *ptr = (u32 *)dest; + int i; + + val = htonl(val); + + for (i = 0; i < (size & (~0x3)); i += 4) + ptr[i >> 2] = val; +} + +/* + * Fold a partial checksum + * This function code has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static __u16 csum_fold(__u32 csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __u16)~sum; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __u32)from64to32(s); +} + +/* + * This function has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) +{ + u32 csum = 0; + u32 cnt = 0; + + /* udp hdr and data */ + for (; cnt < len; cnt += 2) + csum += udp_pkt[cnt >> 1]; + + return csum_tcpudp_magic(saddr, daddr, len, proto, csum); +} + +static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) +{ + memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); +} + +static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) +{ + ip_hdr->version = IP_PKT_VER; + ip_hdr->ihl = 0x5; + ip_hdr->tos = IP_PKT_TOS; + ip_hdr->tot_len = htons(IP_PKT_SIZE); + ip_hdr->id = 0; + ip_hdr->frag_off = 0; + ip_hdr->ttl = IPDEFTTL; + ip_hdr->protocol = IPPROTO_UDP; + ip_hdr->saddr = ifobject->src_ip; + ip_hdr->daddr = ifobject->dst_ip; + ip_hdr->check = 0; +} + +static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, + struct udphdr *udp_hdr) +{ + udp_hdr->source = htons(ifobject->src_port); + udp_hdr->dest = htons(ifobject->dst_port); + udp_hdr->len = htons(UDP_PKT_SIZE); + memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); +} + +static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +{ + udp_hdr->check = 0; + udp_hdr->check = + udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); +} + +static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) +{ + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = umem->frame_size, + .frame_headroom = umem->frame_headroom, + .flags = XSK_UMEM__DEFAULT_FLAGS + }; + int ret; + + if (umem->unaligned_mode) + cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; + + ret = xsk_umem__create(&umem->umem, buffer, size, + &umem->fq, &umem->cq, &cfg); + if (ret) + return ret; + + umem->buffer = buffer; + return 0; +} + +static void enable_busy_poll(struct xsk_socket_info *xsk) +{ + int sock_opt; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = BATCH_SIZE; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); +} + +static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, bool shared) +{ + struct xsk_socket_config cfg = {}; + struct xsk_ring_cons *rxr; + struct xsk_ring_prod *txr; + + xsk->umem = umem; + cfg.rx_size = xsk->rxqsize; + cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; + cfg.xdp_flags = ifobject->xdp_flags; + cfg.bind_flags = ifobject->bind_flags; + if (shared) + cfg.bind_flags |= XDP_SHARED_UMEM; + + txr = ifobject->tx_on ? &xsk->tx : NULL; + rxr = ifobject->rx_on ? &xsk->rx : NULL; + return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg); +} + +static struct option long_options[] = { + {"interface", required_argument, 0, 'i'}, + {"busy-poll", no_argument, 0, 'b'}, + {"dump-pkts", no_argument, 0, 'D'}, + {"verbose", no_argument, 0, 'v'}, + {0, 0, 0, 0} +}; + +static void usage(const char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -i, --interface Use interface\n" + " -D, --dump-pkts Dump packets L2 - L5\n" + " -v, --verbose Verbose output\n" + " -b, --busy-poll Enable busy poll\n"; + + ksft_print_msg(str, prog); +} + +static int switch_namespace(const char *nsname) +{ + char fqns[26] = "/var/run/netns/"; + int nsfd; + + if (!nsname || strlen(nsname) == 0) + return -1; + + strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1); + nsfd = open(fqns, O_RDONLY); + + if (nsfd == -1) + exit_with_error(errno); + + if (setns(nsfd, 0) == -1) + exit_with_error(errno); + + print_verbose("NS switched: %s\n", nsname); + + return nsfd; +} + +static bool validate_interface(struct ifobject *ifobj) +{ + if (!strcmp(ifobj->ifname, "")) + return false; + return true; +} + +static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc, + char **argv) +{ + struct ifobject *ifobj; + u32 interface_nb = 0; + int option_index, c; + + opterr = 0; + + for (;;) { + char *sptr, *token; + + c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'i': + if (interface_nb == 0) + ifobj = ifobj_tx; + else if (interface_nb == 1) + ifobj = ifobj_rx; + else + break; + + sptr = strndupa(optarg, strlen(optarg)); + memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS); + token = strsep(&sptr, ","); + if (token) + memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS); + interface_nb++; + break; + case 'D': + opt_pkt_dump = true; + break; + case 'v': + opt_verbose = true; + break; + case 'b': + ifobj_tx->busy_poll = true; + ifobj_rx->busy_poll = true; + break; + default: + usage(basename(argv[0])); + ksft_exit_xfail(); + } + } +} + +static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx) +{ + u32 i, j; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->xsk = &ifobj->xsk_arr[0]; + ifobj->use_poll = false; + ifobj->use_fill_ring = true; + ifobj->release_rx = true; + ifobj->pkt_stream = test->pkt_stream_default; + ifobj->validation_func = NULL; + + if (i == 0) { + ifobj->rx_on = false; + ifobj->tx_on = true; + } else { + ifobj->rx_on = true; + ifobj->tx_on = false; + } + + memset(ifobj->umem, 0, sizeof(*ifobj->umem)); + ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + + for (j = 0; j < MAX_SOCKETS; j++) { + memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); + ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + } + } + + test->ifobj_tx = ifobj_tx; + test->ifobj_rx = ifobj_rx; + test->current_step = 0; + test->total_steps = 1; + test->nb_sockets = 1; + test->fail = false; +} + +static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = test->pkt_stream_default; + memset(test, 0, sizeof(*test)); + test->pkt_stream_default = pkt_stream; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + if (mode == TEST_MODE_SKB) + ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE; + else + ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE; + + ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY; + } + + __test_spec_init(test, ifobj_tx, ifobj_rx); +} + +static void test_spec_reset(struct test_spec *test) +{ + __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); +} + +static void test_spec_set_name(struct test_spec *test, const char *name) +{ + strncpy(test->name, name, MAX_TEST_NAME_SIZE); +} + +static void pkt_stream_reset(struct pkt_stream *pkt_stream) +{ + if (pkt_stream) + pkt_stream->rx_pkt_nb = 0; +} + +static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) +{ + if (pkt_nb >= pkt_stream->nb_pkts) + return NULL; + + return &pkt_stream->pkts[pkt_nb]; +} + +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) +{ + while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + (*pkts_sent)++; + if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; + pkt_stream->rx_pkt_nb++; + } + return NULL; +} + +static void pkt_stream_delete(struct pkt_stream *pkt_stream) +{ + free(pkt_stream->pkts); + free(pkt_stream); +} + +static void pkt_stream_restore_default(struct test_spec *test) +{ + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; + + if (tx_pkt_stream != test->pkt_stream_default) { + pkt_stream_delete(test->ifobj_tx->pkt_stream); + test->ifobj_tx->pkt_stream = test->pkt_stream_default; + } + + if (test->ifobj_rx->pkt_stream != test->pkt_stream_default && + test->ifobj_rx->pkt_stream != tx_pkt_stream) + pkt_stream_delete(test->ifobj_rx->pkt_stream); + test->ifobj_rx->pkt_stream = test->pkt_stream_default; +} + +static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = calloc(1, sizeof(*pkt_stream)); + if (!pkt_stream) + return NULL; + + pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); + if (!pkt_stream->pkts) { + free(pkt_stream); + return NULL; + } + + pkt_stream->nb_pkts = nb_pkts; + return pkt_stream; +} + +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +{ + pkt->addr = addr; + pkt->len = len; + if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) + pkt->valid = false; + else + pkt->valid = true; +} + +static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) + exit_with_error(ENOMEM); + + pkt_stream->nb_pkts = nb_pkts; + for (i = 0; i < nb_pkts; i++) { + pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, + pkt_len); + pkt_stream->pkts[i].payload = i; + } + + return pkt_stream; +} + +static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem, + struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len); +} + +static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len); + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; +} + +static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); + for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) + pkt_set(umem, &pkt_stream->pkts[i], + (i % umem->num_frames) * umem->frame_size + offset, pkt_len); + + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; +} + +static void pkt_stream_receive_half(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_rx->umem; + struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + u32 i; + + test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->pkt_stream; + for (i = 1; i < pkt_stream->nb_pkts; i += 2) + pkt_stream->pkts[i].valid = false; +} + +static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) +{ + struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); + struct udphdr *udp_hdr; + struct ethhdr *eth_hdr; + struct iphdr *ip_hdr; + void *data; + + if (!pkt) + return NULL; + if (!pkt->valid || pkt->len < MIN_PKT_SIZE) + return pkt; + + data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); + udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); + ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + eth_hdr = (struct ethhdr *)data; + + gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); + gen_ip_hdr(ifobject, ip_hdr); + gen_udp_csum(udp_hdr, ip_hdr); + gen_eth_hdr(ifobject, eth_hdr); + + return pkt; +} + +static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_tx->pkt_stream = pkt_stream; + test->ifobj_rx->pkt_stream = pkt_stream; + + for (i = 0; i < nb_pkts; i++) { + pkt_stream->pkts[i].addr = pkts[i].addr; + pkt_stream->pkts[i].len = pkts[i].len; + pkt_stream->pkts[i].payload = i; + pkt_stream->pkts[i].valid = pkts[i].valid; + } +} + +static void pkt_dump(void *pkt, u32 len) +{ + char s[INET_ADDRSTRLEN]; + struct ethhdr *ethhdr; + struct udphdr *udphdr; + struct iphdr *iphdr; + int payload, i; + + ethhdr = pkt; + iphdr = pkt + sizeof(*ethhdr); + udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); + + /*extract L2 frame */ + fprintf(stdout, "DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_dest[i]); + + fprintf(stdout, "\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_source[i]); + + /*extract L3 frame */ + fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); + fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", + inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); + fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", + inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); + /*extract L4 frame */ + fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); + fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); + /*extract L5 frame */ + payload = *((uint32_t *)(pkt + PKT_HDR_SIZE)); + + fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); + fprintf(stdout, "---------------------------------------\n"); +} + +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, + u64 pkt_stream_addr) +{ + u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; + u32 offset = addr % umem->frame_size, expected_offset = 0; + + if (!pkt_stream->use_addr_for_fill) + pkt_stream_addr = 0; + + expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + + if (offset == expected_offset) + return true; + + ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); + return false; +} + +static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +{ + void *data = xsk_umem__get_data(buffer, addr); + struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + + if (!pkt) { + ksft_print_msg("[%s] too many packets received\n", __func__); + return false; + } + + if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { + /* Do not try to verify packets that are smaller than minimum size. */ + return true; + } + + if (pkt->len != len) { + ksft_print_msg("[%s] expected length [%d], got length [%d]\n", + __func__, pkt->len, len); + return false; + } + + if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { + u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); + + if (opt_pkt_dump) + pkt_dump(data, PKT_SIZE); + + if (pkt->payload != seqnum) { + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", + __func__, pkt->payload, seqnum); + return false; + } + } else { + ksft_print_msg("Invalid frame received: "); + ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, + iphdr->tos); + return false; + } + + return true; +} + +static void kick_tx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret >= 0) + return; + if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + usleep(100); + return; + } + exit_with_error(errno); +} + +static void kick_rx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (ret < 0) + exit_with_error(errno); +} + +static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) +{ + unsigned int rcvd; + u32 idx; + + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) + kick_tx(xsk); + + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); + if (rcvd) { + if (rcvd > xsk->outstanding_tx) { + u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); + + ksft_print_msg("[%s] Too many packets completed\n", __func__); + ksft_print_msg("Last completion address: %llx\n", addr); + return TEST_FAILURE; + } + + xsk_ring_cons__release(&xsk->umem->cq, rcvd); + xsk->outstanding_tx -= rcvd; + } + + return TEST_PASS; +} + +static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) +{ + struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; + u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; + struct pkt_stream *pkt_stream = ifobj->pkt_stream; + struct xsk_socket_info *xsk = ifobj->xsk; + struct xsk_umem_info *umem = xsk->umem; + struct pkt *pkt; + int ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + timeradd(&tv_now, &tv_timeout, &tv_end); + + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); + while (pkt) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + + kick_rx(xsk); + + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + } + continue; + } + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (ret < 0) + exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + } + } + + for (i = 0; i < rcvd; i++) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; + + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + + if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || + !is_offset_correct(umem, pkt_stream, addr, pkt->addr)) + return TEST_FAILURE; + + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); + } + + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, rcvd); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, rcvd); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + if (pkts_in_flight < umem->num_frames) + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; + } + + return TEST_PASS; +} + +static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) +{ + struct xsk_socket_info *xsk = ifobject->xsk; + u32 i, idx, valid_pkts = 0; + + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) + complete_pkts(xsk, BATCH_SIZE); + + for (i = 0; i < BATCH_SIZE; i++) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); + struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); + + if (!pkt) + break; + + tx_desc->addr = pkt->addr; + tx_desc->len = pkt->len; + (*pkt_nb)++; + if (pkt->valid) + valid_pkts++; + } + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight += valid_pkts; + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { + kick_tx(xsk); + pthread_cond_wait(&pacing_cond, &pacing_mutex); + } + pthread_mutex_unlock(&pacing_mutex); + + xsk_ring_prod__submit(&xsk->tx, i); + xsk->outstanding_tx += valid_pkts; + if (complete_pkts(xsk, i)) + return TEST_FAILURE; + + usleep(10); + return TEST_PASS; +} + +static void wait_for_tx_completion(struct xsk_socket_info *xsk) +{ + while (xsk->outstanding_tx) + complete_pkts(xsk, BATCH_SIZE); +} + +static int send_pkts(struct test_spec *test, struct ifobject *ifobject) +{ + struct pollfd fds = { }; + u32 pkt_cnt = 0; + + fds.fd = xsk_socket__fd(ifobject->xsk->xsk); + fds.events = POLLOUT; + + while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { + int err; + + if (ifobject->use_poll) { + int ret; + + ret = poll(&fds, 1, POLL_TMOUT); + if (ret <= 0) + continue; + + if (!(fds.revents & POLLOUT)) + continue; + } + + err = __send_pkts(ifobject, &pkt_cnt); + if (err || test->fail) + return TEST_FAILURE; + } + + wait_for_tx_completion(ifobject->xsk); + return TEST_PASS; +} + +static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) +{ + int fd = xsk_socket__fd(xsk), err; + socklen_t optlen, expected_len; + + optlen = sizeof(*stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + expected_len = sizeof(struct xdp_statistics); + if (optlen != expected_len) { + ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", + __func__, expected_len, optlen); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int validate_rx_dropped(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_rx_full(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_ring_full) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_fill_empty(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_fill_ring_empty_descs) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_tx_invalid_descs(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + int fd = xsk_socket__fd(xsk); + struct xdp_statistics stats; + socklen_t optlen; + int err; + + optlen = sizeof(stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", + __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) +{ + u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; + int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + int ret, ifindex; + void *bufs; + u32 i; + + ifobject->ns_fd = switch_namespace(ifobject->nsname); + + if (ifobject->umem->unaligned_mode) + mmap_flags |= MAP_HUGETLB; + + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + exit_with_error(errno); + + ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + if (ret) + exit_with_error(-ret); + + for (i = 0; i < test->nb_sockets; i++) { + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem, + ifobject, !!i); + if (!ret) + break; + + /* Retry if it fails as xsk_socket__create() is asynchronous */ + if (ctr >= SOCK_RECONF_CTR) + exit_with_error(-ret); + usleep(USLEEP_MAX); + } + + if (ifobject->busy_poll) + enable_busy_poll(&ifobject->xsk_arr[i]); + } + + ifobject->xsk = &ifobject->xsk_arr[0]; + + if (!ifobject->rx_on) + return; + + ifindex = if_nametoindex(ifobject->ifname); + if (!ifindex) + exit_with_error(errno); + + ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); + + ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); + if (ret) + exit_with_error(-ret); + + if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { + if (opts.attach_mode != XDP_ATTACHED_SKB) { + ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); + exit_with_error(-EINVAL); + } + } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { + if (opts.attach_mode != XDP_ATTACHED_DRV) { + ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); + exit_with_error(-EINVAL); + } + } + + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); + if (ret) + exit_with_error(-ret); +} + +static void testapp_cleanup_xsk_res(struct ifobject *ifobj) +{ + print_verbose("Destroying socket\n"); + xsk_socket__delete(ifobj->xsk->xsk); + munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size); + xsk_umem__delete(ifobj->umem->umem); +} + +static void *worker_testapp_validate_tx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_tx; + int err; + + if (test->current_step == 1) + thread_common_ops(test, ifobject); + + print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, + ifobject->ifname); + err = send_pkts(test, ifobject); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) + report_failure(test); + + if (test->total_steps == test->current_step || err) + testapp_cleanup_xsk_res(ifobject); + pthread_exit(NULL); +} + +static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) +{ + u32 idx = 0, i, buffers_to_fill; + int ret; + + if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + buffers_to_fill = umem->num_frames; + else + buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); + if (ret != buffers_to_fill) + exit_with_error(ENOSPC); + for (i = 0; i < buffers_to_fill; i++) { + u64 addr; + + if (pkt_stream->use_addr_for_fill) { + struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); + + if (!pkt) + break; + addr = pkt->addr; + } else { + addr = i * umem->frame_size; + } + + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + } + xsk_ring_prod__submit(&umem->fq, buffers_to_fill); +} + +static void *worker_testapp_validate_rx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_rx; + struct pollfd fds = { }; + int err; + + if (test->current_step == 1) + thread_common_ops(test, ifobject); + + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); + + fds.fd = xsk_socket__fd(ifobject->xsk->xsk); + fds.events = POLLIN; + + pthread_barrier_wait(&barr); + + err = receive_pkts(ifobject, &fds); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) { + report_failure(test); + pthread_mutex_lock(&pacing_mutex); + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); + } + + if (test->total_steps == test->current_step || err) + testapp_cleanup_xsk_res(ifobject); + pthread_exit(NULL); +} + +static int testapp_validate_traffic(struct test_spec *test) +{ + struct ifobject *ifobj_tx = test->ifobj_tx; + struct ifobject *ifobj_rx = test->ifobj_rx; + pthread_t t0, t1; + + if (pthread_barrier_init(&barr, NULL, 2)) + exit_with_error(errno); + + test->current_step++; + pkt_stream_reset(ifobj_rx->pkt_stream); + pkts_in_flight = 0; + + /*Spawn RX thread */ + pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); + + pthread_barrier_wait(&barr); + if (pthread_barrier_destroy(&barr)) + exit_with_error(errno); + + /*Spawn TX thread */ + pthread_create(&t1, NULL, ifobj_tx->func_ptr, test); + + pthread_join(t1, NULL); + pthread_join(t0, NULL); + + return !!test->fail; +} + +static void testapp_teardown(struct test_spec *test) +{ + int i; + + test_spec_set_name(test, "TEARDOWN"); + for (i = 0; i < MAX_TEARDOWN_ITER; i++) { + if (testapp_validate_traffic(test)) + return; + test_spec_reset(test); + } +} + +static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) +{ + thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; + struct ifobject *tmp_ifobj = (*ifobj1); + + (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; + (*ifobj2)->func_ptr = tmp_func_ptr; + + *ifobj1 = *ifobj2; + *ifobj2 = tmp_ifobj; +} + +static void testapp_bidi(struct test_spec *test) +{ + test_spec_set_name(test, "BIDIRECTIONAL"); + test->ifobj_tx->rx_on = true; + test->ifobj_rx->tx_on = true; + test->total_steps = 2; + if (testapp_validate_traffic(test)) + return; + + print_verbose("Switching Tx/Rx vectors\n"); + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + testapp_validate_traffic(test); + + swap_directions(&test->ifobj_rx, &test->ifobj_tx); +} + +static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) +{ + int ret; + + xsk_socket__delete(ifobj_tx->xsk->xsk); + xsk_socket__delete(ifobj_rx->xsk->xsk); + ifobj_tx->xsk = &ifobj_tx->xsk_arr[1]; + ifobj_rx->xsk = &ifobj_rx->xsk_arr[1]; + + ret = xsk_socket__update_xskmap(ifobj_rx->xsk->xsk, ifobj_rx->xsk_map_fd); + if (ret) + exit_with_error(-ret); +} + +static void testapp_bpf_res(struct test_spec *test) +{ + test_spec_set_name(test, "BPF_RES"); + test->total_steps = 2; + test->nb_sockets = 2; + if (testapp_validate_traffic(test)) + return; + + swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); + testapp_validate_traffic(test); +} + +static void testapp_headroom(struct test_spec *test) +{ + test_spec_set_name(test, "UMEM_HEADROOM"); + test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; + testapp_validate_traffic(test); +} + +static void testapp_stats_rx_dropped(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_DROPPED"); + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); + pkt_stream_receive_half(test); + test->ifobj_rx->validation_func = validate_rx_dropped; + testapp_validate_traffic(test); +} + +static void testapp_stats_tx_invalid_descs(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_TX_INVALID"); + pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); + test->ifobj_tx->validation_func = validate_tx_invalid_descs; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); +} + +static void testapp_stats_rx_full(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FULL"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; + test->ifobj_rx->release_rx = false; + test->ifobj_rx->validation_func = validate_rx_full; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); +} + +static void testapp_stats_fill_empty(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->use_fill_ring = false; + test->ifobj_rx->validation_func = validate_fill_empty; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); +} + +/* Simple test */ +static bool hugepages_present(struct ifobject *ifobject) +{ + const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + if (bufs == MAP_FAILED) + return false; + + munmap(bufs, mmap_sz); + return true; +} + +static bool testapp_unaligned(struct test_spec *test) +{ + if (!hugepages_present(test->ifobj_tx)) { + ksft_test_result_skip("No 2M huge pages present.\n"); + return false; + } + + test_spec_set_name(test, "UNALIGNED_MODE"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* Let half of the packets straddle a buffer boundrary */ + pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); + test->ifobj_rx->pkt_stream->use_addr_for_fill = true; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + return true; +} + +static void testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + testapp_validate_traffic(test); + pkt_stream_restore_default(test); +} + +static void testapp_invalid_desc(struct test_spec *test) +{ + struct pkt pkts[] = { + /* Zero packet address allowed */ + {0, PKT_SIZE, 0, true}, + /* Allowed packet */ + {0x1000, PKT_SIZE, 0, true}, + /* Straddling the start of umem */ + {-2, PKT_SIZE, 0, false}, + /* Packet too large */ + {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + /* After umem ends */ + {UMEM_SIZE, PKT_SIZE, 0, false}, + /* Straddle the end of umem */ + {UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false}, + /* Straddle a page boundrary */ + {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, + /* Straddle a 2K boundrary */ + {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, + /* Valid packet for synch so that something is received */ + {0x4000, PKT_SIZE, 0, true}}; + + if (test->ifobj_tx->umem->unaligned_mode) { + /* Crossing a page boundrary allowed */ + pkts[6].valid = true; + } + if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundrary not allowed */ + pkts[7].valid = false; + } + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + testapp_validate_traffic(test); + pkt_stream_restore_default(test); +} + +static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, + const char *dst_ip, const char *src_ip, const u16 dst_port, + const u16 src_port, thread_func_t func_ptr) +{ + struct in_addr ip; + + memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); + memcpy(ifobj->src_mac, src_mac, ETH_ALEN); + + inet_aton(dst_ip, &ip); + ifobj->dst_ip = ip.s_addr; + + inet_aton(src_ip, &ip); + ifobj->src_ip = ip.s_addr; + + ifobj->dst_port = dst_port; + ifobj->src_port = src_port; + + ifobj->func_ptr = func_ptr; +} + +static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) +{ + switch (type) { + case TEST_TYPE_STATS_RX_DROPPED: + testapp_stats_rx_dropped(test); + break; + case TEST_TYPE_STATS_TX_INVALID_DESCS: + testapp_stats_tx_invalid_descs(test); + break; + case TEST_TYPE_STATS_RX_FULL: + testapp_stats_rx_full(test); + break; + case TEST_TYPE_STATS_FILL_EMPTY: + testapp_stats_fill_empty(test); + break; + case TEST_TYPE_TEARDOWN: + testapp_teardown(test); + break; + case TEST_TYPE_BIDI: + testapp_bidi(test); + break; + case TEST_TYPE_BPF_RES: + testapp_bpf_res(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION: + test_spec_set_name(test, "RUN_TO_COMPLETION"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: + test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); + testapp_single_pkt(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: + test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); + break; + case TEST_TYPE_POLL: + test->ifobj_tx->use_poll = true; + test->ifobj_rx->use_poll = true; + test_spec_set_name(test, "POLL"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_ALIGNED_INV_DESC: + test_spec_set_name(test, "ALIGNED_INV_DESC"); + testapp_invalid_desc(test); + break; + case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: + test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + testapp_invalid_desc(test); + break; + case TEST_TYPE_UNALIGNED_INV_DESC: + if (!hugepages_present(test->ifobj_tx)) { + ksft_test_result_skip("No 2M huge pages present.\n"); + return; + } + test_spec_set_name(test, "UNALIGNED_INV_DESC"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + testapp_invalid_desc(test); + break; + case TEST_TYPE_UNALIGNED: + if (!testapp_unaligned(test)) + return; + break; + case TEST_TYPE_HEADROOM: + testapp_headroom(test); + break; + default: + break; + } + + if (!test->fail) + ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); +} + +static struct ifobject *ifobject_create(void) +{ + struct ifobject *ifobj; + + ifobj = calloc(1, sizeof(struct ifobject)); + if (!ifobj) + return NULL; + + ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); + if (!ifobj->xsk_arr) + goto out_xsk_arr; + + ifobj->umem = calloc(1, sizeof(*ifobj->umem)); + if (!ifobj->umem) + goto out_umem; + + return ifobj; + +out_umem: + free(ifobj->xsk_arr); +out_xsk_arr: + free(ifobj); + return NULL; +} + +static void ifobject_delete(struct ifobject *ifobj) +{ + free(ifobj->umem); + free(ifobj->xsk_arr); + free(ifobj); +} + +int main(int argc, char **argv) +{ + struct pkt_stream *pkt_stream_default; + struct ifobject *ifobj_tx, *ifobj_rx; + u32 i, j, failed_tests = 0; + struct test_spec test; + + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + + ifobj_tx = ifobject_create(); + if (!ifobj_tx) + exit_with_error(ENOMEM); + ifobj_rx = ifobject_create(); + if (!ifobj_rx) + exit_with_error(ENOMEM); + + setlocale(LC_ALL, ""); + + parse_command_line(ifobj_tx, ifobj_rx, argc, argv); + + if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) { + usage(basename(argv[0])); + ksft_exit_xfail(); + } + + init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, + worker_testapp_validate_tx); + init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, + worker_testapp_validate_rx); + + test_spec_init(&test, ifobj_tx, ifobj_rx, 0); + pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); + if (!pkt_stream_default) + exit_with_error(ENOMEM); + test.pkt_stream_default = pkt_stream_default; + + ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX); + + for (i = 0; i < TEST_MODE_MAX; i++) + for (j = 0; j < TEST_TYPE_MAX; j++) { + test_spec_init(&test, ifobj_tx, ifobj_rx, i); + run_pkt_test(&test, i, j); + usleep(USLEEP_MAX); + + if (test.fail) + failed_tests++; + } + + pkt_stream_delete(pkt_stream_default); + ifobject_delete(ifobj_tx); + ifobject_delete(ifobj_rx); + + if (failed_tests) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h new file mode 100644 index 000000000000..3d17053f98e5 --- /dev/null +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2020 Intel Corporation. + */ + +#ifndef XSKXCEIVER_H_ +#define XSKXCEIVER_H_ + +#ifndef SOL_XDP +#define SOL_XDP 283 +#endif + +#ifndef AF_XDP +#define AF_XDP 44 +#endif + +#ifndef PF_XDP +#define PF_XDP AF_XDP +#endif + +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif + +#define TEST_PASS 0 +#define TEST_FAILURE -1 +#define MAX_INTERFACES 2 +#define MAX_INTERFACE_NAME_CHARS 7 +#define MAX_INTERFACES_NAMESPACE_CHARS 10 +#define MAX_SOCKETS 2 +#define MAX_TEST_NAME_SIZE 32 +#define MAX_TEARDOWN_ITER 10 +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) +#define MIN_ETH_PKT_SIZE 64 +#define ETH_FCS_SIZE 4 +#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) +#define PKT_SIZE (MIN_PKT_SIZE) +#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) +#define IP_PKT_VER 0x4 +#define IP_PKT_TOS 0x9 +#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define USLEEP_MAX 10000 +#define SOCK_RECONF_CTR 10 +#define BATCH_SIZE 64 +#define POLL_TMOUT 1000 +#define RECV_TMOUT 3 +#define DEFAULT_PKT_CNT (4 * 1024) +#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) +#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) +#define RX_FULL_RXQSIZE 32 +#define UMEM_HEADROOM_TEST_SIZE 128 +#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) + +#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) + +enum test_mode { + TEST_MODE_SKB, + TEST_MODE_DRV, + TEST_MODE_MAX +}; + +enum test_type { + TEST_TYPE_RUN_TO_COMPLETION, + TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, + TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, + TEST_TYPE_POLL, + TEST_TYPE_UNALIGNED, + TEST_TYPE_ALIGNED_INV_DESC, + TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, + TEST_TYPE_UNALIGNED_INV_DESC, + TEST_TYPE_HEADROOM, + TEST_TYPE_TEARDOWN, + TEST_TYPE_BIDI, + TEST_TYPE_STATS_RX_DROPPED, + TEST_TYPE_STATS_TX_INVALID_DESCS, + TEST_TYPE_STATS_RX_FULL, + TEST_TYPE_STATS_FILL_EMPTY, + TEST_TYPE_BPF_RES, + TEST_TYPE_MAX +}; + +static bool opt_pkt_dump; +static bool opt_verbose; + +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + u32 num_frames; + u32 frame_headroom; + void *buffer; + u32 frame_size; + bool unaligned_mode; +}; + +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + u32 outstanding_tx; + u32 rxqsize; +}; + +struct pkt { + u64 addr; + u32 len; + u32 payload; + bool valid; +}; + +struct pkt_stream { + u32 nb_pkts; + u32 rx_pkt_nb; + struct pkt *pkts; + bool use_addr_for_fill; +}; + +struct ifobject; +typedef int (*validation_func_t)(struct ifobject *ifobj); +typedef void *(*thread_func_t)(void *arg); + +struct ifobject { + char ifname[MAX_INTERFACE_NAME_CHARS]; + char nsname[MAX_INTERFACES_NAMESPACE_CHARS]; + struct xsk_socket_info *xsk; + struct xsk_socket_info *xsk_arr; + struct xsk_umem_info *umem; + thread_func_t func_ptr; + validation_func_t validation_func; + struct pkt_stream *pkt_stream; + int ns_fd; + int xsk_map_fd; + u32 dst_ip; + u32 src_ip; + u32 xdp_flags; + u32 bind_flags; + u16 src_port; + u16 dst_port; + bool tx_on; + bool rx_on; + bool use_poll; + bool busy_poll; + bool use_fill_ring; + bool release_rx; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; +}; + +struct test_spec { + struct ifobject *ifobj_tx; + struct ifobject *ifobj_rx; + struct pkt_stream *pkt_stream_default; + u16 total_steps; + u16 current_step; + u16 nb_sockets; + bool fail; + char name[MAX_TEST_NAME_SIZE]; +}; + +pthread_barrier_t barr; +pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; + +int pkts_in_flight; + +#endif /* XSKXCEIVER_H_ */ -- cgit v1.2.3-59-g8ed1b From 32e0d9b3104845e0b3f24d89033a17a317ba37f9 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 6 Jul 2022 21:28:55 +0000 Subject: selftests/bpf: Add test involving restrict type qualifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change adds a type based test involving the restrict type qualifier to the BPF selftests. On the btfgen path, this will verify that bpftool correctly handles the corresponding RESTRICT BTF kind. Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20220706212855.1700615-3-deso@posteo.net --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 ++ tools/testing/selftests/bpf/progs/core_reloc_types.h | 8 ++++++-- tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index a6f65e2236f4..c8655ba9a88f 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -764,6 +764,7 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_int_exists = 1, .typedef_enum_exists = 1, .typedef_void_ptr_exists = 1, + .typedef_restrict_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, @@ -777,6 +778,7 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_int_matches = 1, .typedef_enum_matches = 1, .typedef_void_ptr_matches = 1, + .typedef_restrict_ptr_matches = 1, .typedef_func_proto_matches = 1, .typedef_arr_matches = 1, diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 7ef91d19c66e..fd8e1b4c6762 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -874,6 +874,7 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; @@ -887,6 +888,7 @@ struct core_reloc_type_based_output { bool typedef_int_matches; bool typedef_enum_matches; bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; bool typedef_func_proto_matches; bool typedef_arr_matches; @@ -939,6 +941,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -955,8 +958,9 @@ struct core_reloc_type_based { int_typedef f8; enum_typedef f9; void_ptr_typedef f10; - func_proto_typedef f11; - arr_typedef f12; + restrict_ptr_typedef f11; + func_proto_typedef f12; + arr_typedef f13; }; /* no types in target */ diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index d95bc08b75c1..2edb4df35e6e 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -51,6 +51,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -67,6 +68,7 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; @@ -80,6 +82,7 @@ struct core_reloc_type_based_output { bool typedef_int_matches; bool typedef_enum_matches; bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; bool typedef_func_proto_matches; bool typedef_arr_matches; @@ -118,6 +121,7 @@ int test_core_type_based(void *ctx) out->typedef_int_exists = bpf_core_type_exists(int_typedef); out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef); out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); @@ -131,6 +135,7 @@ int test_core_type_based(void *ctx) out->typedef_int_matches = bpf_core_type_matches(int_typedef); out->typedef_enum_matches = bpf_core_type_matches(enum_typedef); out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef); + out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef); out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef); out->typedef_arr_matches = bpf_core_type_matches(arr_typedef); -- cgit v1.2.3-59-g8ed1b From d1a6edecc1fddfb6ef92c8f720631d2c02bf2744 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 8 Jul 2022 10:50:00 -0700 Subject: bpf: Check attach_func_proto more carefully in check_return_code Syzkaller reports the following crash: RIP: 0010:check_return_code kernel/bpf/verifier.c:10575 [inline] RIP: 0010:do_check kernel/bpf/verifier.c:12346 [inline] RIP: 0010:do_check_common+0xb3d2/0xd250 kernel/bpf/verifier.c:14610 With the following reproducer: bpf$PROG_LOAD_XDP(0x5, &(0x7f00000004c0)={0xd, 0x3, &(0x7f0000000000)=ANY=[@ANYBLOB="1800000000000019000000000000000095"], &(0x7f0000000300)='GPL\x00', 0x0, 0x0, 0x0, 0x0, 0x0, '\x00', 0x0, 0x2b, 0xffffffffffffffff, 0x8, 0x0, 0x0, 0x10, 0x0}, 0x80) Because we don't enforce expected_attach_type for XDP programs, we end up in hitting 'if (prog->expected_attach_type == BPF_LSM_CGROUP' part in check_return_code and follow up with testing `prog->aux->attach_func_proto->type`, but `prog->aux->attach_func_proto` is NULL. Add explicit prog_type check for the "Note, BPF_LSM_CGROUP that attach ..." condition. Also, don't skip return code check for LSM/STRUCT_OPS. The above actually brings an issue with existing selftest which tries to return EPERM from void inet_csk_clone. Fix the test (and move called_socket_clone to make sure it's not incremented in case of an error) and add a new one to explicitly verify this condition. Fixes: 69fd337a975c ("bpf: per-cgroup lsm flavor") Reported-by: syzbot+5cc0730bd4b4d2c5f152@syzkaller.appspotmail.com Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20220708175000.2603078-1-sdf@google.com --- kernel/bpf/verifier.c | 21 ++++++++++++++++----- tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c | 12 ++++++++++++ tools/testing/selftests/bpf/progs/lsm_cgroup.c | 12 ++++++------ .../selftests/bpf/progs/lsm_cgroup_nonvoid.c | 14 ++++++++++++++ 4 files changed, 48 insertions(+), 11 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c (limited to 'tools/testing') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index df3ec6b05f05..e3cf6194c24f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10444,11 +10444,21 @@ static int check_return_code(struct bpf_verifier_env *env) const bool is_subprog = frame->subprogno; /* LSM and struct_ops func-ptr's return type could be "void" */ - if (!is_subprog && - (prog_type == BPF_PROG_TYPE_STRUCT_OPS || - prog_type == BPF_PROG_TYPE_LSM) && - !prog->aux->attach_func_proto->type) - return 0; + if (!is_subprog) { + switch (prog_type) { + case BPF_PROG_TYPE_LSM: + if (prog->expected_attach_type == BPF_LSM_CGROUP) + /* See below, can be 0 or 0-1 depending on hook. */ + break; + fallthrough; + case BPF_PROG_TYPE_STRUCT_OPS: + if (!prog->aux->attach_func_proto->type) + return 0; + break; + default: + break; + } + } /* eBPF calling convention is such that R0 is used * to return the value from eBPF program. @@ -10572,6 +10582,7 @@ static int check_return_code(struct bpf_verifier_env *env) if (!tnum_in(range, reg->var_off)) { verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); if (prog->expected_attach_type == BPF_LSM_CGROUP && + prog_type == BPF_PROG_TYPE_LSM && !prog->aux->attach_func_proto->type) verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n"); return -EINVAL; diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index c542d7e80a5b..1102e4f42d2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -6,6 +6,7 @@ #include #include "lsm_cgroup.skel.h" +#include "lsm_cgroup_nonvoid.skel.h" #include "cgroup_helpers.h" #include "network_helpers.h" @@ -293,9 +294,20 @@ close_cgroup: lsm_cgroup__destroy(skel); } +static void test_lsm_cgroup_nonvoid(void) +{ + struct lsm_cgroup_nonvoid *skel = NULL; + + skel = lsm_cgroup_nonvoid__open_and_load(); + ASSERT_NULL(skel, "open succeeds"); + lsm_cgroup_nonvoid__destroy(skel); +} + void test_lsm_cgroup(void) { if (test__start_subtest("functional")) test_lsm_cgroup_functional(); + if (test__start_subtest("nonvoid")) + test_lsm_cgroup_nonvoid(); btf__free(btf); } diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c index 89f3b1e961a8..4f2d60b87b75 100644 --- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -156,25 +156,25 @@ int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req) { int prio = 234; - called_socket_clone++; - if (!newsk) return 1; /* Accepted request sockets get a different priority. */ if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) - return 0; /* EPERM */ + return 1; /* Make sure bpf_getsockopt is allowed and works. */ prio = 0; if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) - return 0; /* EPERM */ + return 1; if (prio != 234) - return 0; /* EPERM */ + return 1; /* Can access cgroup local storage. */ if (!test_local_storage()) - return 0; /* EPERM */ + return 1; + + called_socket_clone++; return 1; } diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c new file mode 100644 index 000000000000..6cb0f161f417 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req) +{ + /* Can not return any errors from void LSM hooks. */ + return 0; +} -- cgit v1.2.3-59-g8ed1b From 24bdfdd2ec343c94adf38fb5bc699f12e543713b Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 8 Jul 2022 16:03:19 +0300 Subject: selftests/bpf: Fix xdp_synproxy build failure if CONFIG_NF_CONNTRACK=m/n When CONFIG_NF_CONNTRACK=m, struct bpf_ct_opts and enum member BPF_F_CURRENT_NETNS are not exposed. This commit allows building the xdp_synproxy selftest in such cases. Note that nf_conntrack must be loaded before running the test if it's compiled as a module. This commit also allows this selftest to be successfully compiled when CONFIG_NF_CONNTRACK is disabled. One unused local variable of type struct bpf_ct_opts is also removed. Fixes: fb5cd0ce70d4 ("selftests/bpf: Add selftests for raw syncookie helpers") Reported-by: Yauheni Kaliuta Signed-off-by: Maxim Mikityanskiy Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220708130319.1016294-1-maximmi@nvidia.com --- .../selftests/bpf/progs/xdp_synproxy_kern.c | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 9fd62e94b5e6..736686e903f6 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -77,16 +77,30 @@ struct { __uint(max_entries, MAX_ALLOWED_PORTS); } allowed_ports SEC(".maps"); +/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in + * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally. + */ + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u8 reserved[2]; +} __attribute__((preserve_access_index)); + +#define BPF_F_CURRENT_NETNS (-1) + extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, __u32 len_tuple, - struct bpf_ct_opts *opts, + struct bpf_ct_opts___local *opts, __u32 len_opts) __ksym; extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, u32 len_tuple, - struct bpf_ct_opts *opts, + struct bpf_ct_opts___local *opts, u32 len_opts) __ksym; extern void bpf_ct_release(struct nf_conn *ct) __ksym; @@ -393,7 +407,7 @@ static __always_inline int tcp_dissect(void *data, void *data_end, static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) { - struct bpf_ct_opts ct_lookup_opts = { + struct bpf_ct_opts___local ct_lookup_opts = { .netns_id = BPF_F_CURRENT_NETNS, .l4proto = IPPROTO_TCP, }; @@ -714,10 +728,6 @@ static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, struct header_pointers *hdr, bool xdp) { - struct bpf_ct_opts ct_lookup_opts = { - .netns_id = BPF_F_CURRENT_NETNS, - .l4proto = IPPROTO_TCP, - }; int ret; ret = tcp_dissect(data, data_end, hdr); -- cgit v1.2.3-59-g8ed1b From d0d9c8f2df60c6d1495201981f4b424628601113 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 8 Jul 2022 10:14:09 -0700 Subject: selftests: mptcp: tweak simult_flows for debug kernels The mentioned test measures the transfer run-time to verify that the user-space program is able to use the full aggregate B/W. Even on (virtual) link-speed-bound tests, debug kernel can slow down the transfer enough to cause sporadic test failures. Instead of unconditionally raising the maximum allowed run-time, tweak when the running kernel is a debug one, and use some simple/ rough heuristic to guess such scenarios. Note: this intentionally avoids looking for /boot/config- as the latter file is not always available in our reference CI environments. Signed-off-by: Paolo Abeni Co-developed-by: Mat Martineau Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/simult_flows.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f441ff7904fc..ffa13a957a36 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -12,6 +12,7 @@ timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 +slack=50 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ]" @@ -52,6 +53,7 @@ setup() cout=$(mktemp) capout=$(mktemp) size=$((2 * 2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 @@ -104,6 +106,16 @@ setup() ip -net "$ns3" route add default via dead:beef:3::2 ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + + # debug build can slow down measurably the test program + # we use quite tight time limit on the run-time, to ensure + # maximum B/W usage. + # Use kmemleak/lockdep/kasan/prove_locking presence as a rough + # estimate for this being a debug kernel and increase the + # maximum run-time accordingly. Observed run times for CI builds + # running selftests, including kbuild, were used to determine the + # amount of time to add. + grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } # $1: ns, $2: port @@ -241,7 +253,7 @@ run_test() # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack - time=$((time + 450)) + time=$((time + 400 + slack)) printf "%-60s" "$msg" do_transfer $small $large $time -- cgit v1.2.3-59-g8ed1b From 97040cf9806e1163af29a3e24f2b8c5584aa44dd Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jul 2022 10:14:10 -0700 Subject: selftests: mptcp: userspace pm address tests This patch adds userspace pm tests support for mptcp_join.sh script. Add userspace pm add_addr and rm_addr test cases in userspace_tests(). Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 49 ++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a4406b7a8064..d889e7507cd9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -455,6 +455,12 @@ wait_mpj() done } +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + pm_nl_set_limits() { local ns=$1 @@ -654,6 +660,9 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid + local userspace_pm=0 + local evts_ns1 + local evts_ns1_pid :> "$cout" :> "$sout" @@ -690,12 +699,24 @@ do_transfer() extra_args="-r ${speed:6}" fi + if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns1=${addr_nr_ns1:10} + fi + if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then # disconnect extra_args="$extra_args -I ${addr_nr_ns2:10}" addr_nr_ns2=0 fi + if [ $userspace_pm -eq 1 ]; then + evts_ns1=$(mktemp) + :> "$evts_ns1" + ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + fi + local local_addr if is_v6 "${connect_addr}"; then local_addr="::" @@ -748,6 +769,8 @@ do_transfer() if [ $addr_nr_ns1 -gt 0 ]; then local counter=2 local add_nr_ns1=${addr_nr_ns1} + local id=10 + local tk while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -755,9 +778,18 @@ do_transfer() else addr="10.0.$counter.1" fi - pm_nl_add_endpoint $ns1 $addr flags signal + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns1 $addr flags signal + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id + sleep 1 + ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id + fi + counter=$((counter + 1)) add_nr_ns1=$((add_nr_ns1 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns1 -lt 0 ]; then local rm_nr_ns1=$((-addr_nr_ns1)) @@ -890,6 +922,11 @@ do_transfer() kill $cappid fi + if [ $userspace_pm -eq 1 ]; then + kill_wait $evts_ns1_pid + rm -rf $evts_ns1 + fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -2810,6 +2847,16 @@ userspace_tests() chk_join_nr 0 0 0 chk_rm_nr 0 0 fi + + # userspace pm add & remove address + if reset "userspace pm add & remove address"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + fi } endpoint_tests() -- cgit v1.2.3-59-g8ed1b From 5e986ec468745e8d4582070d869a10eaae8ba56c Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jul 2022 10:14:11 -0700 Subject: selftests: mptcp: userspace pm subflow tests This patch adds userspace pm subflow tests support for mptcp_join.sh script. Add userspace pm create subflow and destroy test cases in userspace_tests(). Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 39 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d889e7507cd9..55efe2aafb84 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -663,6 +663,8 @@ do_transfer() local userspace_pm=0 local evts_ns1 local evts_ns1_pid + local evts_ns2 + local evts_ns2_pid :> "$cout" :> "$sout" @@ -708,13 +710,20 @@ do_transfer() # disconnect extra_args="$extra_args -I ${addr_nr_ns2:10}" addr_nr_ns2=0 + elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns2=${addr_nr_ns2:10} fi if [ $userspace_pm -eq 1 ]; then evts_ns1=$(mktemp) + evts_ns2=$(mktemp) :> "$evts_ns1" + :> "$evts_ns2" ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & evts_ns1_pid=$! + ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! fi local local_addr @@ -836,6 +845,8 @@ do_transfer() if [ $addr_nr_ns2 -gt 0 ]; then local add_nr_ns2=${addr_nr_ns2} local counter=3 + local id=20 + local tk da dp sp while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -843,9 +854,23 @@ do_transfer() else addr="10.0.$counter.2" fi - pm_nl_add_endpoint $ns2 $addr flags $flags + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns2 $addr flags $flags + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \ + rip $da rport $dp token $tk + sleep 1 + sp=$(grep "type:10" "$evts_ns2" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ + rip $da rport $dp token $tk + fi counter=$((counter + 1)) add_nr_ns2=$((add_nr_ns2 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns2 -lt 0 ]; then local rm_nr_ns2=$((-addr_nr_ns2)) @@ -924,7 +949,8 @@ do_transfer() if [ $userspace_pm -eq 1 ]; then kill_wait $evts_ns1_pid - rm -rf $evts_ns1 + kill_wait $evts_ns2_pid + rm -rf $evts_ns1 $evts_ns2 fi NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ @@ -2857,6 +2883,15 @@ userspace_tests() chk_add_nr 1 1 chk_rm_nr 1 1 invert fi + + # userspace pm create destroy subflow + if reset "userspace pm create destroy subflow"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow + chk_join_nr 1 1 1 + chk_rm_nr 0 1 + fi } endpoint_tests() -- cgit v1.2.3-59-g8ed1b From 507719cd7c0f0251fb2b772e73e4c35b7429587b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jul 2022 10:14:12 -0700 Subject: selftests: mptcp: avoid Terminated messages in userspace_pm There're some 'Terminated' messages in the output of userspace pm tests script after killing './pm_nl_ctl events' processes: Created network namespaces ns1, ns2 [OK] ./userspace_pm.sh: line 166: 13735 Terminated ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 ./userspace_pm.sh: line 172: 13737 Terminated ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 Established IPv4 MPTCP Connection ns2 => ns1 [OK] ./userspace_pm.sh: line 166: 13753 Terminated ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 ./userspace_pm.sh: line 172: 13755 Terminated ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 Established IPv6 MPTCP Connection ns2 => ns1 [OK] ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token [OK] This patch adds a helper kill_wait(), in it using 'wait $pid 2>/dev/null' commands after 'kill $pid' to avoid printing out these Terminated messages. Use this helper instead of using 'kill $pid'. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 40 +++++++++++++---------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index abe3d4ebe554..3229725b64b0 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -37,6 +37,12 @@ rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + cleanup() { echo "cleanup" @@ -48,16 +54,16 @@ cleanup() kill -SIGUSR1 $client4_pid > /dev/null 2>&1 fi if [ $server4_pid -ne 0 ]; then - kill $server4_pid > /dev/null 2>&1 + kill_wait $server4_pid fi if [ $client6_pid -ne 0 ]; then kill -SIGUSR1 $client6_pid > /dev/null 2>&1 fi if [ $server6_pid -ne 0 ]; then - kill $server6_pid > /dev/null 2>&1 + kill_wait $server6_pid fi if [ $evts_pid -ne 0 ]; then - kill $evts_pid > /dev/null 2>&1 + kill_wait $evts_pid fi local netns for netns in "$ns1" "$ns2" ;do @@ -153,7 +159,7 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill $client_evts_pid + kill_wait $client_evts_pid local client_token local client_port @@ -165,7 +171,7 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill $server_evts_pid + kill_wait $server_evts_pid server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$server_evts") @@ -286,7 +292,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -321,7 +327,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -416,7 +422,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -449,7 +455,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -553,7 +559,7 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid local sport sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -592,7 +598,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -631,7 +637,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -647,7 +653,7 @@ test_subflows() ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -674,7 +680,7 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid> /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -713,7 +719,7 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -750,7 +756,7 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -766,7 +772,7 @@ test_subflows() ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } -- cgit v1.2.3-59-g8ed1b From 65ebc6676d17847dde26dcbe50627a2fe198ca4b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 8 Jul 2022 10:14:13 -0700 Subject: selftests: mptcp: update pm_nl_ctl usage header The usage header of pm_nl_ctl command doesn't match with the context. So this patch adds the missing userspace PM keywords 'ann', 'rem', 'csf', 'dsf', 'events' and 'listen' in it. Reviewed-by: Mat Martineau Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index cb79f0719e3b..abddf4c63e79 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -31,7 +31,7 @@ static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|set|del|flush|dump|accept []\n", argv[0]); + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept []\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id ] [dev ] \n"); fprintf(stderr, "\tann id token [port ] [dev ]\n"); fprintf(stderr, "\trem id token \n"); -- cgit v1.2.3-59-g8ed1b From 1d55f20313853b09cd111b04bb264ca22e1d6046 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 8 Jul 2022 19:52:55 -0700 Subject: selftests: tls: add test for NoPad getsockopt Make sure setsockopt / getsockopt behave as expected. Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e71ec5846be9..dc26aae0feb0 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1674,6 +1674,57 @@ TEST(keysizes) { close(cfd); } +TEST(no_pad) { + struct tls12_crypto_info_aes_gcm_256 tls12; + int ret, fd, cfd, val; + socklen_t len; + bool notls; + + memset(&tls12, 0, sizeof(tls12)); + tls12.info.version = TLS_1_3_VERSION; + tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + val = 1; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 1); + EXPECT_EQ(len, 4); + + val = 0; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 0); + EXPECT_EQ(len, 4); + + close(fd); + close(cfd); +} + TEST(tls_v6ops) { struct tls_crypto_info_keys tls12; struct sockaddr_in6 addr, addr2; -- cgit v1.2.3-59-g8ed1b From a9d2fae89fa8eb638203d8a4da435c647c12dfa3 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Tue, 12 Jul 2022 13:31:45 +0100 Subject: selftests/bpf: add a ksym iter subtest add subtest verifying BPF ksym iter behaviour. The BPF ksym iter program shows an example of dumping a format different to /proc/kallsyms. It adds KIND and MAX_SIZE fields which represent the kind of symbol (core kernel, module, ftrace, bpf, or kprobe) and the maximum size the symbol can be. The latter is calculated from the difference between current symbol value and the next symbol value. The key benefit for this iterator will likely be supporting in-kernel data-gathering rather than dumping symbol details to userspace and parsing the results. Signed-off-by: Alan Maguire Acked-by: Yonghong Song Link: https://lore.kernel.org/r/1657629105-7812-3-git-send-email-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_iter.c | 16 +++++ tools/testing/selftests/bpf/progs/bpf_iter.h | 7 +++ tools/testing/selftests/bpf/progs/bpf_iter_ksym.c | 74 +++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ksym.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7ff5fa93d056..a33874b081b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -27,6 +27,7 @@ #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" +#include "bpf_iter_ksym.skel.h" static int duration; @@ -1120,6 +1121,19 @@ static void test_link_iter(void) bpf_iter_bpf_link__destroy(skel); } +static void test_ksym_iter(void) +{ + struct bpf_iter_ksym *skel; + + skel = bpf_iter_ksym__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_ksym); + + bpf_iter_ksym__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1267,4 +1281,6 @@ void test_bpf_iter(void) test_buf_neg_offset(); if (test__start_subtest("link-iter")) test_link_iter(); + if (test__start_subtest("ksym")) + test_ksym_iter(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 97ec8bc76ae6..e9846606690d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -22,6 +22,7 @@ #define BTF_F_NONAME BTF_F_NONAME___not_used #define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used #define BTF_F_ZERO BTF_F_ZERO___not_used +#define bpf_iter__ksym bpf_iter__ksym___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -44,6 +45,7 @@ #undef BTF_F_NONAME #undef BTF_F_PTR_RAW #undef BTF_F_ZERO +#undef bpf_iter__ksym struct bpf_iter_meta { struct seq_file *seq; @@ -151,3 +153,8 @@ enum { BTF_F_PTR_RAW = (1ULL << 2), BTF_F_ZERO = (1ULL << 3), }; + +struct bpf_iter__ksym { + struct bpf_iter_meta *meta; + struct kallsym_iter *ksym; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c new file mode 100644 index 000000000000..285c008cbf9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include + +char _license[] SEC("license") = "GPL"; + +unsigned long last_sym_value = 0; + +static inline char tolower(char c) +{ + if (c >= 'A' && c <= 'Z') + c += ('a' - 'A'); + return c; +} + +static inline char toupper(char c) +{ + if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'); + return c; +} + +/* Dump symbols with max size; the latter is calculated by caching symbol N value + * and when iterating on symbol N+1, we can print max size of symbol N via + * address of N+1 - address of N. + */ +SEC("iter/ksym") +int dump_ksym(struct bpf_iter__ksym *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kallsym_iter *iter = ctx->ksym; + __u32 seq_num = ctx->meta->seq_num; + unsigned long value; + char type; + int ret; + + if (!iter) + return 0; + + if (seq_num == 0) { + BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n"); + return 0; + } + if (last_sym_value) + BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value); + else + BPF_SEQ_PRINTF(seq, "\n"); + + value = iter->show_value ? iter->value : 0; + + last_sym_value = value; + + type = iter->type; + + if (iter->module_name[0]) { + type = iter->exported ? toupper(type) : tolower(type); + BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", + value, type, iter->name, iter->module_name); + } else { + BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name); + } + if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos) + BPF_SEQ_PRINTF(seq, "CORE "); + else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "MOD "); + else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "FTRACE_MOD "); + else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos) + BPF_SEQ_PRINTF(seq, "BPF "); + else + BPF_SEQ_PRINTF(seq, "KPROBE "); + return 0; +} -- cgit v1.2.3-59-g8ed1b From 914f6a59b10f41a8baf62d625087e6586d4762af Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 11 Jul 2022 12:16:33 -0700 Subject: selftests: mptcp: add MPC backup tests Add a couple of test-cases covering the newly introduced features - priority update for the MPC subflow. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 55efe2aafb84..ff83ef426df5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2428,6 +2428,36 @@ backup_tests() chk_add_nr 1 1 chk_prio_nr 1 1 fi + + if reset "mpc backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi } add_addr_ports_tests() -- cgit v1.2.3-59-g8ed1b From 94bf6aad5dbed1c93618035ec31b37927538c276 Mon Sep 17 00:00:00 2001 From: Linkui Xiao Date: Thu, 14 Jul 2022 09:56:47 +0800 Subject: selftests/bpf: Return true/false (not 1/0) from bool functions Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. This fixes the following warnings from coccicheck: tools/testing/selftests/bpf/progs/test_xdp_noinline.c:407:9-10: WARNING: return of 0/1 in function 'decap_v4' with return type bool tools/testing/selftests/bpf/progs/test_xdp_noinline.c:389:9-10: WARNING: return of 0/1 in function 'decap_v6' with return type bool tools/testing/selftests/bpf/progs/test_xdp_noinline.c:290:9-10: WARNING: return of 0/1 in function 'encap_v6' with return type bool tools/testing/selftests/bpf/progs/test_xdp_noinline.c:264:9-10: WARNING: return of 0/1 in function 'parse_tcp' with return type bool tools/testing/selftests/bpf/progs/test_xdp_noinline.c:242:9-10: WARNING: return of 0/1 in function 'parse_udp' with return type bool Generated by: scripts/coccinelle/misc/boolreturn.cocci Suggested-by: Stanislav Fomichev Signed-off-by: Linkui Xiao Signed-off-by: Andrii Nakryiko Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20220714015647.25074-1-xiaolinkui@kylinos.cn --- .../selftests/bpf/progs/test_xdp_noinline.c | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 125d872d7981..ba48fcb98ab2 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -239,7 +239,7 @@ bool parse_udp(void *data, void *data_end, udp = data + off; if (udp + 1 > data_end) - return 0; + return false; if (!is_icmp) { pckt->flow.port16[0] = udp->source; pckt->flow.port16[1] = udp->dest; @@ -247,7 +247,7 @@ bool parse_udp(void *data, void *data_end, pckt->flow.port16[0] = udp->dest; pckt->flow.port16[1] = udp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -261,7 +261,7 @@ bool parse_tcp(void *data, void *data_end, tcp = data + off; if (tcp + 1 > data_end) - return 0; + return false; if (tcp->syn) pckt->flags |= (1 << 1); if (!is_icmp) { @@ -271,7 +271,7 @@ bool parse_tcp(void *data, void *data_end, pckt->flow.port16[0] = tcp->dest; pckt->flow.port16[1] = tcp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -287,7 +287,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, void *data; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -295,7 +295,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct ipv6hdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || ip6h + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 56710; @@ -314,7 +314,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->saddr.in6_u.u6_addr32[2] = 3; ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); - return 1; + return true; } static __attribute__ ((noinline)) @@ -335,7 +335,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, ip_suffix <<= 15; ip_suffix ^= pckt->flow.src; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -343,7 +343,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct iphdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || iph + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 8; @@ -367,8 +367,8 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; - return 1; + return false; + return true; } static __attribute__ ((noinline)) @@ -386,10 +386,10 @@ bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) else new_eth->eth_proto = 56710; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) @@ -404,10 +404,10 @@ bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); new_eth->eth_proto = 8; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) -- cgit v1.2.3-59-g8ed1b From 7fb27a56b9ebd8a77d9dd188e8a42bff99bc3443 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jul 2022 10:23:16 +0200 Subject: selftests/bpf: Do not attach kprobe_multi bench to bpf_dispatcher_xdp_func Alexei reported crash by running test_progs -j on system with 32 cpus. It turned out the kprobe_multi bench test that attaches all ftrace-able functions will race with bpf_dispatcher_update, that calls bpf_arch_text_poke on bpf_dispatcher_xdp_func, which is ftrace-able function. Ftrace is not aware of this update so this will cause ftrace_bug with: WARNING: CPU: 6 PID: 1985 at arch/x86/kernel/ftrace.c:94 ftrace_verify_code+0x27/0x50 ... ftrace_replace_code+0xa3/0x170 ftrace_modify_all_code+0xbd/0x150 ftrace_startup_enable+0x3f/0x50 ftrace_startup+0x98/0xf0 register_ftrace_function+0x20/0x60 register_fprobe_ips+0xbb/0xd0 bpf_kprobe_multi_link_attach+0x179/0x430 __sys_bpf+0x18a1/0x2440 ... ------------[ ftrace bug ]------------ ftrace failed to modify [] bpf_dispatcher_xdp_func+0x0/0x10 actual: ffffffe9:7b:ffffff9c:77:1e Setting ftrace call site to call ftrace function It looks like we need some way to hide some functions from ftrace, but meanwhile we workaround this by skipping bpf_dispatcher_xdp_func from kprobe_multi bench test. Reported-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220714082316.479181-1-jolsa@kernel.org --- tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 335917df0614..d457a55ff408 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -364,6 +364,8 @@ static int get_syms(char ***symsp, size_t *cntp) continue; if (!strncmp(name, "rcu_", 4)) continue; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + continue; if (!strncmp(name, "__ftrace_invalid_address__", sizeof("__ftrace_invalid_address__") - 1)) continue; -- cgit v1.2.3-59-g8ed1b From 0ea7b0a454ca1839acddc37c4cf802f0e0d5fb5f Mon Sep 17 00:00:00 2001 From: Jaehee Park Date: Wed, 13 Jul 2022 16:40:49 -0700 Subject: selftests: net: arp_ndisc_untracked_subnets: test for arp_accept and accept_untracked_na ipv4 arp_accept has a new option '2' to create new neighbor entries only if the src ip is in the same subnet as an address configured on the interface that received the garp message. This selftest tests all options in arp_accept. ipv6 has a sysctl endpoint, accept_untracked_na, that defines the behavior for accepting untracked neighbor advertisements. A new option similar to that of arp_accept for learning only from the same subnet is added to accept_untracked_na. This selftest tests this new feature. Signed-off-by: Jaehee Park Suggested-by: Roopa Prabhu Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/arp_ndisc_untracked_subnets.sh | 308 +++++++++++++++++++++ 2 files changed, 309 insertions(+) create mode 100755 tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index db05b3764b77..80628bf8413a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,6 +38,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh +TEST_PROGS += arp_ndisc_untracked_subnets.sh TEST_PROGS += stress_reuseport_listen.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh new file mode 100755 index 000000000000..c899b446acb6 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -0,0 +1,308 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# 2 namespaces: one host and one router. Use arping from the host to send a +# garp to the router. Router accepts or ignores based on its arp_accept +# or accept_untracked_na configuration. + +TESTS="arp ndisc" + +ROUTER_NS="ns-router" +ROUTER_NS_V6="ns-router-v6" +ROUTER_INTF="veth-router" +ROUTER_ADDR="10.0.10.1" +ROUTER_ADDR_V6="2001:db8:abcd:0012::1" + +HOST_NS="ns-host" +HOST_NS_V6="ns-host-v6" +HOST_INTF="veth-host" +HOST_ADDR="10.0.10.2" +HOST_ADDR_V6="2001:db8:abcd:0012::2" + +SUBNET_WIDTH=24 +PREFIX_WIDTH_V6=64 + +cleanup() { + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +cleanup_v6() { + ip netns del ${HOST_NS_V6} + ip netns del ${ROUTER_NS_V6} +} + +setup() { + set -e + local arp_accept=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS} + ip netns add ${HOST_NS} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up + ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS} + + ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up + ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \ + dev ${ROUTER_INTF} + + ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \ + dev ${HOST_INTF} + ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \ + dev ${HOST_INTF} + ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \ + dev ${ROUTER_INTF} + + ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF} + ip netns exec ${ROUTER_NS} sysctl -w \ + ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1 + set +e +} + +setup_v6() { + set -e + local accept_untracked_na=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS_V6} + ip netns add ${HOST_NS_V6} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up + ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \ + ${HOST_NS_V6} + + ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up + ip -6 -netns ${ROUTER_NS_V6} addr add \ + ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad + + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${HOST_INTF} + + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \ + >/dev/null 2>&1 + set +e +} + +verify_arp() { + local arp_accept=$1 + local same_subnet=$2 + + neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \ + ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null) + + if [ ${arp_accept} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + elif [ ${arp_accept} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + [[ -z "${neigh_show_output}" ]] + fi + } + +arp_test_gratuitous() { + set -e + local arp_accept=$1 + local same_subnet=$2 + + if [ ${arp_accept} -eq 2 ]; then + test_msg=("test_arp: " + "accept_arp=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + HOST_ADDR=10.0.11.3 + else + HOST_ADDR=10.0.10.3 + fi + else + test_msg=("test_arp: " + "accept_arp=$1") + fi + # Supply arp_accept option to set up which sets it in sysctl + setup ${arp_accept} + ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null + + if verify_arp $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + cleanup + set +e +} + +arp_test_gratuitous_combinations() { + arp_test_gratuitous 0 + arp_test_gratuitous 1 + arp_test_gratuitous 2 0 # Second entry indicates subnet or not + arp_test_gratuitous 2 1 +} + +cleanup_tcpdump() { + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ip netns exec ${ROUTER_NS_V6} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +verify_ndisc() { + local accept_untracked_na=$1 + local same_subnet=$2 + + neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \ + to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale) + + if [ ${accept_untracked_na} -eq 1 ]; then + # Neighbour entry expected to be present + [[ ${neigh_show_output} ]] + elif [ ${accept_untracked_na} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + # Neighbour entry expected to be absent for all other cases + [[ -z "${neigh_show_output}" ]] + fi +} + +ndisc_test_untracked_advertisements() { + set -e + test_msg=("test_ndisc: " + "accept_untracked_na=$1") + + local accept_untracked_na=$1 + local same_subnet=$2 + if [ ${accept_untracked_na} -eq 2 ]; then + test_msg=("test_ndisc: " + "accept_untracked_na=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + # Not same subnet + HOST_ADDR_V6=2000:db8:abcd:0013::4 + else + HOST_ADDR_V6=2001:db8:abcd:0012::3 + fi + fi + setup_v6 $1 $2 + start_tcpdump + + if verify_ndisc $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + + cleanup_tcpdump + cleanup_v6 + set +e +} + +ndisc_test_untracked_combinations() { + ndisc_test_untracked_advertisements 0 + ndisc_test_untracked_advertisements 1 + ndisc_test_untracked_advertisements 2 0 + ndisc_test_untracked_advertisements 2 1 +} + +################################################################################ +# usage + +usage() +{ + cat < Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v arping)" ]; then + echo "SKIP: Could not run test without arping tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null +cleanup_v6 &> /dev/null + +for t in $TESTS +do + case $t in + arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;; + ndisc_test_untracked_combinations|ndisc) \ + ndisc_test_untracked_combinations;; + help) echo "Test names: $TESTS"; exit 0;; +esac +done -- cgit v1.2.3-59-g8ed1b From ce6dc74a0a4a22047fdaf893047483b303b64898 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 00:07:52 -0700 Subject: selftests/bpf: add test of __weak unknown virtual __kconfig extern Exercise libbpf's logic for unknown __weak virtual __kconfig externs. USDT selftests are already excercising non-weak known virtual extern already (LINUX_HAS_BPF_COOKIE), so no need to add explicit tests for it. Tested-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220714070755.3235561-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/core_extern.c | 17 +++++++---------- tools/testing/selftests/bpf/progs/test_core_extern.c | 3 +++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c index 1931a158510e..63a51e9f3630 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_extern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -39,6 +39,7 @@ static struct test_case { "CONFIG_STR=\"abracad\"\n" "CONFIG_MISSING=0", .data = { + .unkn_virt_val = 0, .bpf_syscall = false, .tristate_val = TRI_MODULE, .bool_val = true, @@ -121,7 +122,7 @@ static struct test_case { void test_core_extern(void) { const uint32_t kern_ver = get_kernel_version(); - int err, duration = 0, i, j; + int err, i, j; struct test_core_extern *skel = NULL; uint64_t *got, *exp; int n = sizeof(*skel->data) / sizeof(uint64_t); @@ -136,19 +137,17 @@ void test_core_extern(void) continue; skel = test_core_extern__open_opts(&opts); - if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) goto cleanup; err = test_core_extern__load(skel); if (t->fails) { - CHECK(!err, "skel_load", - "shouldn't succeed open/load of skeleton\n"); + ASSERT_ERR(err, "skel_load_should_fail"); goto cleanup; - } else if (CHECK(err, "skel_load", - "failed to open/load skeleton\n")) { + } else if (!ASSERT_OK(err, "skel_load")) { goto cleanup; } err = test_core_extern__attach(skel); - if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + if (!ASSERT_OK(err, "attach_raw_tp")) goto cleanup; usleep(1); @@ -158,9 +157,7 @@ void test_core_extern(void) got = (uint64_t *)skel->data; exp = (uint64_t *)&t->data; for (j = 0; j < n; j++) { - CHECK(got[j] != exp[j], "check_res", - "result #%d: expected %llx, but got %llx\n", - j, (__u64)exp[j], (__u64)got[j]); + ASSERT_EQ(got[j], exp[j], "result"); } cleanup: test_core_extern__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c index 3ac3603ad53d..a3c7c1042f35 100644 --- a/tools/testing/selftests/bpf/progs/test_core_extern.c +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -11,6 +11,7 @@ static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; extern int LINUX_KERNEL_VERSION __kconfig; +extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak; extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; extern bool CONFIG_BOOL __kconfig __weak; @@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak; extern uint64_t CONFIG_MISSING __kconfig __weak; uint64_t kern_ver = -1; +uint64_t unkn_virt_val = -1; uint64_t bpf_syscall = -1; uint64_t tristate_val = -1; uint64_t bool_val = -1; @@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx) int i; kern_ver = LINUX_KERNEL_VERSION; + unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN; bpf_syscall = CONFIG_BPF_SYSCALL; tristate_val = CONFIG_TRISTATE; bool_val = CONFIG_BOOL; -- cgit v1.2.3-59-g8ed1b From d814ed62d3d24eb5c5f904b897e0414c1ccb5740 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 00:07:55 -0700 Subject: selftests/bpf: use BPF_KSYSCALL and SEC("ksyscall") in selftests Convert few selftest that used plain SEC("kprobe") with arch-specific syscall wrapper prefix to ksyscall/kretsyscall and corresponding BPF_KSYSCALL macro. test_probe_user.c is especially benefiting from this simplification. Tested-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220714070755.3235561-6-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/bpf_syscall_macro.c | 6 ++--- .../selftests/bpf/progs/test_attach_probe.c | 15 ++++++------ .../testing/selftests/bpf/progs/test_probe_user.c | 27 +++++----------------- 3 files changed, 16 insertions(+), 32 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c index 05838ed9b89c..e1e11897e99b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -64,9 +64,9 @@ int BPF_KPROBE(handle_sys_prctl) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_prctl") -int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) +SEC("ksyscall/prctl") +int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) { pid_t pid = bpf_get_current_pid_tgid() >> 32; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index f1c88ad368ef..a1e45fec8938 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -1,11 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include -#include +#include "vmlinux.h" #include #include -#include +#include #include "bpf_misc.h" int kprobe_res = 0; @@ -31,8 +30,8 @@ int handle_kprobe(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KPROBE(handle_kprobe_auto) +SEC("ksyscall/nanosleep") +int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { kprobe2_res = 11; return 0; @@ -56,11 +55,11 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe_auto) +SEC("kretsyscall/nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { kretprobe2_res = 22; - return 0; + return ret; } SEC("uprobe") diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 702578a5e496..8e1495008e4d 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -1,35 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 - -#include -#include - -#include - +#include "vmlinux.h" #include #include +#include #include "bpf_misc.h" static struct sockaddr_in old; -SEC("kprobe/" SYS_PREFIX "sys_connect") -int BPF_KPROBE(handle_sys_connect) +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) { -#if SYSCALL_WRAPPER == 1 - struct pt_regs *real_regs; -#endif struct sockaddr_in new; - void *ptr; - -#if SYSCALL_WRAPPER == 0 - ptr = (void *)PT_REGS_PARM2(ctx); -#else - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); -#endif - bpf_probe_read_user(&old, sizeof(old), ptr); + bpf_probe_read_user(&old, sizeof(old), uservaddr); __builtin_memset(&new, 0xab, sizeof(new)); - bpf_probe_write_user(ptr, &new, sizeof(new)); + bpf_probe_write_user(uservaddr, &new, sizeof(new)); return 0; } -- cgit v1.2.3-59-g8ed1b From 24316461200502aa5feddaa72dcbb8059503a528 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 14 Jul 2022 22:31:46 -0700 Subject: selftests/bpf: validate .bss section bigger than 8MB is possible now Add a simple big 16MB array and validate access to the very last byte of it to make sure that kernel supports > KMALLOC_MAX_SIZE value_size for BPF array maps (which are backing .bss in this case). Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220715053146.1291891-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/skeleton.c | 2 ++ tools/testing/selftests/bpf/progs/test_skeleton.c | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f4c..99dac5292b41 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -122,6 +122,8 @@ void test_skeleton(void) ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d2967b..1a4e93f6d9df 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; int read_mostly_var __read_mostly; int out_mostly_var; +char huge_arr[16 * 1024 * 1024]; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -71,6 +73,8 @@ int handler(const void *ctx) out_mostly_var = read_mostly_var; + huge_arr[sizeof(huge_arr) - 1] = 123; + return 0; } -- cgit v1.2.3-59-g8ed1b From e134601961fef4516df9413b270fb96ef6d034bc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 15 Jul 2022 16:09:52 -0700 Subject: selftests/bpf: test eager BPF ringbuf size adjustment logic Add test validating that libbpf adjusts (and reflects adjusted) ringbuf size early, before bpf_object is loaded. Also make sure we can't successfully resize ringbuf map after bpf_object is loaded. Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20220715230952.2219271-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index eb5f7f5aa81a..1455911d9fcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,6 +50,13 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + /* validate ringbuf size adjustment logic */ + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final"); + proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; @@ -65,6 +72,10 @@ void test_ringbuf_multi(void) close(proto_fd); proto_fd = -1; + /* make sure we can't resize ringbuf after object load */ + if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); -- cgit v1.2.3-59-g8ed1b From c5d22f4cfe8dfb93f1db0a1e7e2e7ebc41395d98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Jul 2022 12:50:32 +0300 Subject: selftests/bpf: fix a test for snprintf() overflow The snprintf() function returns the number of bytes which *would* have been copied if there were space. In other words, it can be > sizeof(pin_path). Fixes: c0fa1b6c3efc ("bpf: btf: Add BTF tests") Signed-off-by: Dan Carpenter Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/YtZ+aD/tZMkgOUw+@kili Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 941b0100bafa..ef6528b8084c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5338,7 +5338,7 @@ static void do_test_pprint(int test_num) ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", "/sys/fs/bpf", test->map_name); - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long", "/sys/fs/bpf", test->map_name)) { err = -1; goto done; -- cgit v1.2.3-59-g8ed1b From f12b86c0d60689aa3973bab1fcea0ead9d77e23b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 19 Jul 2022 17:57:50 -0700 Subject: selftests: net: af_unix: Fix a build error of unix_connect.c. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a build error reported in the link. [0] unix_connect.c: In function ‘unix_connect_test’: unix_connect.c:115:55: error: expected identifier before ‘(’ token #define offsetof(type, member) ((size_t)&((type *)0)->(member)) ^ unix_connect.c:128:12: note: in expansion of macro ‘offsetof’ addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len; ^~~~~~~~ We can fix this by removing () around member, but checkpatch will complain about it, and the root cause of the build failure is that I followed the warning and fixed this in the v2 -> v3 change of the blamed commit. [1] CHECK: Macro argument 'member' may be better as '(member)' to avoid precedence issues #33: FILE: tools/testing/selftests/net/af_unix/unix_connect.c:115: +#define offsetof(type, member) ((size_t)&((type *)0)->member) To avoid this warning, let's use offsetof() defined in stddef.h instead. [0]: https://lore.kernel.org/linux-mm/202207182205.FrkMeDZT-lkp@intel.com/ [1]: https://lore.kernel.org/netdev/20220702154818.66761-1-kuniyu@amazon.com/ Fixes: e95ab1d85289 ("selftests: net: af_unix: Test connect() with different netns.") Reported-by: kernel test robot Suggested-by: Jakub Kicinski Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20220720005750.16600-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/unix_connect.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c index 157e44ef7f37..d799fd8f5c7c 100644 --- a/tools/testing/selftests/net/af_unix/unix_connect.c +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include +#include #include #include @@ -112,8 +113,6 @@ FIXTURE_TEARDOWN(unix_connect) remove("test"); } -#define offsetof(type, member) ((size_t)&((type *)0)->(member)) - TEST_F(unix_connect, test) { socklen_t addrlen; -- cgit v1.2.3-59-g8ed1b From 842463f253abde9a0de19a4b9e83a6c28ac9364b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Jul 2022 13:37:01 -0700 Subject: selftests: tls: add a test for timeo vs lock Add a test for recv timeout. Place it in the tls_err group, so it only runs for TLS 1.2 and 1.3 but not for every AEAD out there. Link: https://lore.kernel.org/r/20220720203701.2179034-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index dc26aae0feb0..4ecbac197c46 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1597,6 +1597,38 @@ TEST_F(tls_err, bad_cmsg) EXPECT_EQ(errno, EBADMSG); } +TEST_F(tls_err, timeo) +{ + struct timeval tv = { .tv_usec = 10000, }; + char buf[128]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + wait(&ret); + } else { + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + exit(0); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; -- cgit v1.2.3-59-g8ed1b From a4703e3184320d6e15e2bc81d2ccf1c8c883f9d1 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:35 +0200 Subject: bpf: Switch to new kfunc flags infrastructure Instead of populating multiple sets to indicate some attribute and then researching the same BTF ID in them, prepare a single unified BTF set which indicates whether a kfunc is allowed to be called, and also its attributes if any at the same time. Now, only one call is needed to perform the lookup for both kfunc availability and its attributes. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-4-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +- include/linux/btf.h | 33 ++----- kernel/bpf/btf.c | 106 ++++++++++----------- kernel/bpf/verifier.c | 14 ++- net/bpf/test_run.c | 70 +++++--------- net/ipv4/bpf_tcp_ca.c | 18 ++-- net/ipv4/tcp_bbr.c | 24 ++--- net/ipv4/tcp_cubic.c | 20 ++-- net/ipv4/tcp_dctcp.c | 20 ++-- net/netfilter/nf_conntrack_bpf.c | 49 +++------- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 10 +- 11 files changed, 145 insertions(+), 222 deletions(-) (limited to 'tools/testing') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11950029284f..a97751d845c9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1924,7 +1924,8 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *regs); int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs); + struct bpf_reg_state *regs, + u32 kfunc_flags); int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, struct bpf_reg_state *reg); int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog, diff --git a/include/linux/btf.h b/include/linux/btf.h index 1bfed7fa0428..6dfc6eaf7f8c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -12,14 +12,11 @@ #define BTF_TYPE_EMIT(type) ((void)(type *)0) #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) -enum btf_kfunc_type { - BTF_KFUNC_TYPE_CHECK, - BTF_KFUNC_TYPE_ACQUIRE, - BTF_KFUNC_TYPE_RELEASE, - BTF_KFUNC_TYPE_RET_NULL, - BTF_KFUNC_TYPE_KPTR_ACQUIRE, - BTF_KFUNC_TYPE_MAX, -}; +/* These need to be macros, as the expressions are used in assembler input */ +#define KF_ACQUIRE (1 << 0) /* kfunc is an acquire function */ +#define KF_RELEASE (1 << 1) /* kfunc is a release function */ +#define KF_RET_NULL (1 << 2) /* kfunc returns a pointer that may be NULL */ +#define KF_KPTR_GET (1 << 3) /* kfunc returns reference to a kptr */ struct btf; struct btf_member; @@ -30,16 +27,7 @@ struct btf_id_set; struct btf_kfunc_id_set { struct module *owner; - union { - struct { - struct btf_id_set *check_set; - struct btf_id_set *acquire_set; - struct btf_id_set *release_set; - struct btf_id_set *ret_null_set; - struct btf_id_set *kptr_acquire_set; - }; - struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; - }; + struct btf_id_set8 *set; }; struct btf_id_dtor_kfunc { @@ -378,9 +366,9 @@ const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id); + u32 kfunc_btf_id); int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s); s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); @@ -397,12 +385,11 @@ static inline const char *btf_name_by_offset(const struct btf *btf, { return NULL; } -static inline bool btf_kfunc_id_set_contains(const struct btf *btf, +static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - return false; + return NULL; } static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, const struct btf_kfunc_id_set *s) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 5869f03bcb6e..4d9c2d88720f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -213,7 +213,7 @@ enum { }; struct btf_kfunc_set_tab { - struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; + struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; }; struct btf_id_dtor_kfunc_tab { @@ -1616,7 +1616,7 @@ static void btf_free_id(struct btf *btf) static void btf_free_kfunc_set_tab(struct btf *btf) { struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; - int hook, type; + int hook; if (!tab) return; @@ -1625,10 +1625,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf) */ if (btf_is_module(btf)) goto free_tab; - for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { - for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) - kfree(tab->sets[hook][type]); - } + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) + kfree(tab->sets[hook]); free_tab: kfree(tab); btf->kfunc_set_tab = NULL; @@ -6172,7 +6170,8 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf, static int btf_check_func_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, struct bpf_reg_state *regs, - bool ptr_to_mem_ok) + bool ptr_to_mem_ok, + u32 kfunc_flags) { enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_verifier_log *log = &env->log; @@ -6210,10 +6209,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, if (is_kfunc) { /* Only kfunc can be release func */ - rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RELEASE, func_id); - kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id); + rel = kfunc_flags & KF_RELEASE; + kptr_get = kfunc_flags & KF_KPTR_GET; } /* check that BTF function arguments match actual types that the @@ -6442,7 +6439,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, return -EINVAL; is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; - err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global); + err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0); /* Compiler optimizations can remove arguments from static functions * or mismatched type can be passed into a global function. @@ -6455,9 +6452,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog, int btf_check_kfunc_arg_match(struct bpf_verifier_env *env, const struct btf *btf, u32 func_id, - struct bpf_reg_state *regs) + struct bpf_reg_state *regs, + u32 kfunc_flags) { - return btf_check_func_arg_match(env, btf, func_id, regs, true); + return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags); } /* Convert BTF of a function into bpf_reg_state if possible @@ -6854,6 +6852,11 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id) return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; } +static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id) +{ + return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); +} + enum { BTF_MODULE_F_LIVE = (1 << 0), }; @@ -7102,16 +7105,16 @@ BTF_TRACING_TYPE_xxx /* Kernel Function (kfunc) BTF ID set registration API */ -static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, - struct btf_id_set *add_set, bool vmlinux_set) +static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, + struct btf_id_set8 *add_set) { + bool vmlinux_set = !btf_is_module(btf); struct btf_kfunc_set_tab *tab; - struct btf_id_set *set; + struct btf_id_set8 *set; u32 set_cnt; int ret; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { + if (hook >= BTF_KFUNC_HOOK_MAX) { ret = -EINVAL; goto end; } @@ -7127,7 +7130,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, btf->kfunc_set_tab = tab; } - set = tab->sets[hook][type]; + set = tab->sets[hook]; /* Warn when register_btf_kfunc_id_set is called twice for the same hook * for module sets. */ @@ -7141,7 +7144,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * pointer and return. */ if (!vmlinux_set) { - tab->sets[hook][type] = add_set; + tab->sets[hook] = add_set; return 0; } @@ -7150,7 +7153,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary - * searching the set using btf_id_set_contains function work. + * searching the set using btf_id_set8_contains function work. */ set_cnt = set ? set->cnt : 0; @@ -7165,8 +7168,8 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* Grow set */ - set = krealloc(tab->sets[hook][type], - offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), + set = krealloc(tab->sets[hook], + offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]), GFP_KERNEL | __GFP_NOWARN); if (!set) { ret = -ENOMEM; @@ -7174,15 +7177,15 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, } /* For newly allocated set, initialize set->cnt to 0 */ - if (!tab->sets[hook][type]) + if (!tab->sets[hook]) set->cnt = 0; - tab->sets[hook][type] = set; + tab->sets[hook] = set; /* Concatenate the two sets */ - memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); + memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); set->cnt += add_set->cnt; - sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); + sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); return 0; end: @@ -7190,38 +7193,25 @@ end: return ret; } -static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, - const struct btf_kfunc_id_set *kset) -{ - bool vmlinux_set = !btf_is_module(btf); - int type, ret = 0; - - for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { - if (!kset->sets[type]) - continue; - - ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); - if (ret) - break; - } - return ret; -} - -static bool __btf_kfunc_id_set_contains(const struct btf *btf, +static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, enum btf_kfunc_hook hook, - enum btf_kfunc_type type, u32 kfunc_btf_id) { - struct btf_id_set *set; + struct btf_id_set8 *set; + u32 *id; - if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) - return false; + if (hook >= BTF_KFUNC_HOOK_MAX) + return NULL; if (!btf->kfunc_set_tab) - return false; - set = btf->kfunc_set_tab->sets[hook][type]; + return NULL; + set = btf->kfunc_set_tab->sets[hook]; if (!set) - return false; - return btf_id_set_contains(set, kfunc_btf_id); + return NULL; + id = btf_id_set8_contains(set, kfunc_btf_id); + if (!id) + return NULL; + /* The flags for BTF ID are located next to it */ + return id + 1; } static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) @@ -7249,14 +7239,14 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) * keeping the reference for the duration of the call provides the necessary * protection for looking up a well-formed btf->kfunc_set_tab. */ -bool btf_kfunc_id_set_contains(const struct btf *btf, +u32 *btf_kfunc_id_set_contains(const struct btf *btf, enum bpf_prog_type prog_type, - enum btf_kfunc_type type, u32 kfunc_btf_id) + u32 kfunc_btf_id) { enum btf_kfunc_hook hook; hook = bpf_prog_type_to_kfunc_hook(prog_type); - return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); } /* This function must be invoked only from initcalls/module init functions */ @@ -7283,7 +7273,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, return PTR_ERR(btf); hook = bpf_prog_type_to_kfunc_hook(prog_type); - ret = btf_populate_kfunc_set(btf, hook, kset); + ret = btf_populate_kfunc_set(btf, hook, kset->set); btf_put(btf); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7c1e056624f9..096fdac70165 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7562,6 +7562,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int err, insn_idx = *insn_idx_p; const struct btf_param *args; struct btf *desc_btf; + u32 *kfunc_flags; bool acq; /* skip for now, but return error when we find this in fixup_kfunc_call */ @@ -7577,18 +7578,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, func_name = btf_name_by_offset(desc_btf, func->name_off); func_proto = btf_type_by_id(desc_btf, func->type); - if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_CHECK, func_id)) { + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + if (!kfunc_flags) { verbose(env, "calling kernel function %s is not allowed\n", func_name); return -EACCES; } - - acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_ACQUIRE, func_id); + acq = *kfunc_flags & KF_ACQUIRE; /* Check the arguments */ - err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); + err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags); if (err < 0) return err; /* In case of release function, we get register number of refcounted @@ -7632,8 +7631,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, regs[BPF_REG_0].btf = desc_btf; regs[BPF_REG_0].type = PTR_TO_BTF_ID; regs[BPF_REG_0].btf_id = ptr_type_id; - if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), - BTF_KFUNC_TYPE_RET_NULL, func_id)) { + if (*kfunc_flags & KF_RET_NULL) { regs[BPF_REG_0].type |= PTR_MAYBE_NULL; /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ regs[BPF_REG_0].id = ++env->id_gen; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dc9dc0bedca0..ca5b7234a350 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -695,48 +695,26 @@ __diag_pop(); ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); -BTF_SET_START(test_sk_check_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test1) -BTF_ID(func, bpf_kfunc_call_test2) -BTF_ID(func, bpf_kfunc_call_test3) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_ID(func, bpf_kfunc_call_test_pass_ctx) -BTF_ID(func, bpf_kfunc_call_test_pass1) -BTF_ID(func, bpf_kfunc_call_test_pass2) -BTF_ID(func, bpf_kfunc_call_test_fail1) -BTF_ID(func, bpf_kfunc_call_test_fail2) -BTF_ID(func, bpf_kfunc_call_test_fail3) -BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) -BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) -BTF_SET_END(test_sk_check_kfunc_ids) - -BTF_SET_START(test_sk_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_acquire_kfunc_ids) - -BTF_SET_START(test_sk_release_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_release) -BTF_ID(func, bpf_kfunc_call_memb_release) -BTF_ID(func, bpf_kfunc_call_memb1_release) -BTF_SET_END(test_sk_release_kfunc_ids) - -BTF_SET_START(test_sk_ret_null_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_acquire) -BTF_ID(func, bpf_kfunc_call_memb_acquire) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_ret_null_kfunc_ids) - -BTF_SET_START(test_sk_kptr_acquire_kfunc_ids) -BTF_ID(func, bpf_kfunc_call_test_kptr_get) -BTF_SET_END(test_sk_kptr_acquire_kfunc_ids) +BTF_SET8_START(test_sk_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_SET8_END(test_sk_check_kfunc_ids) static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, u32 size, u32 headroom, u32 tailroom) @@ -1620,12 +1598,8 @@ out: } static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &test_sk_check_kfunc_ids, - .acquire_set = &test_sk_acquire_kfunc_ids, - .release_set = &test_sk_release_kfunc_ids, - .ret_null_set = &test_sk_ret_null_kfunc_ids, - .kptr_acquire_set = &test_sk_kptr_acquire_kfunc_ids + .owner = THIS_MODULE, + .set = &test_sk_check_kfunc_ids, }; BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 7a181631b995..85a9e500c42d 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -197,17 +197,17 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, } } -BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) -BTF_ID(func, tcp_reno_ssthresh) -BTF_ID(func, tcp_reno_cong_avoid) -BTF_ID(func, tcp_reno_undo_cwnd) -BTF_ID(func, tcp_slow_start) -BTF_ID(func, tcp_cong_avoid_ai) -BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) +BTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) +BTF_ID_FLAGS(func, tcp_reno_ssthresh) +BTF_ID_FLAGS(func, tcp_reno_cong_avoid) +BTF_ID_FLAGS(func, tcp_reno_undo_cwnd) +BTF_ID_FLAGS(func, tcp_slow_start) +BTF_ID_FLAGS(func, tcp_cong_avoid_ai) +BTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_tcp_ca_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_tcp_ca_check_kfunc_ids, }; static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 075e744bfb48..54eec33c6e1c 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1154,24 +1154,24 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .set_state = bbr_set_state, }; -BTF_SET_START(tcp_bbr_check_kfunc_ids) +BTF_SET8_START(tcp_bbr_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, bbr_init) -BTF_ID(func, bbr_main) -BTF_ID(func, bbr_sndbuf_expand) -BTF_ID(func, bbr_undo_cwnd) -BTF_ID(func, bbr_cwnd_event) -BTF_ID(func, bbr_ssthresh) -BTF_ID(func, bbr_min_tso_segs) -BTF_ID(func, bbr_set_state) +BTF_ID_FLAGS(func, bbr_init) +BTF_ID_FLAGS(func, bbr_main) +BTF_ID_FLAGS(func, bbr_sndbuf_expand) +BTF_ID_FLAGS(func, bbr_undo_cwnd) +BTF_ID_FLAGS(func, bbr_cwnd_event) +BTF_ID_FLAGS(func, bbr_ssthresh) +BTF_ID_FLAGS(func, bbr_min_tso_segs) +BTF_ID_FLAGS(func, bbr_set_state) #endif #endif -BTF_SET_END(tcp_bbr_check_kfunc_ids) +BTF_SET8_END(tcp_bbr_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_bbr_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_bbr_check_kfunc_ids, }; static int __init bbr_register(void) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 68178e7280ce..768c10c1f649 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -485,22 +485,22 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .name = "cubic", }; -BTF_SET_START(tcp_cubic_check_kfunc_ids) +BTF_SET8_START(tcp_cubic_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, cubictcp_init) -BTF_ID(func, cubictcp_recalc_ssthresh) -BTF_ID(func, cubictcp_cong_avoid) -BTF_ID(func, cubictcp_state) -BTF_ID(func, cubictcp_cwnd_event) -BTF_ID(func, cubictcp_acked) +BTF_ID_FLAGS(func, cubictcp_init) +BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) +BTF_ID_FLAGS(func, cubictcp_cong_avoid) +BTF_ID_FLAGS(func, cubictcp_state) +BTF_ID_FLAGS(func, cubictcp_cwnd_event) +BTF_ID_FLAGS(func, cubictcp_acked) #endif #endif -BTF_SET_END(tcp_cubic_check_kfunc_ids) +BTF_SET8_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_cubic_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index ab034a4e9324..2a6c0dd665a4 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -239,22 +239,22 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = { .name = "dctcp-reno", }; -BTF_SET_START(tcp_dctcp_check_kfunc_ids) +BTF_SET8_START(tcp_dctcp_check_kfunc_ids) #ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE -BTF_ID(func, dctcp_init) -BTF_ID(func, dctcp_update_alpha) -BTF_ID(func, dctcp_cwnd_event) -BTF_ID(func, dctcp_ssthresh) -BTF_ID(func, dctcp_cwnd_undo) -BTF_ID(func, dctcp_state) +BTF_ID_FLAGS(func, dctcp_init) +BTF_ID_FLAGS(func, dctcp_update_alpha) +BTF_ID_FLAGS(func, dctcp_cwnd_event) +BTF_ID_FLAGS(func, dctcp_ssthresh) +BTF_ID_FLAGS(func, dctcp_cwnd_undo) +BTF_ID_FLAGS(func, dctcp_state) #endif #endif -BTF_SET_END(tcp_dctcp_check_kfunc_ids) +BTF_SET8_END(tcp_dctcp_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &tcp_dctcp_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &tcp_dctcp_check_kfunc_ids, }; static int __init dctcp_register(void) diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index bc4d5cd63a94..cf2096f65d0e 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -219,48 +219,21 @@ void bpf_ct_release(struct nf_conn *nfct) __diag_pop() -BTF_SET_START(nf_ct_xdp_check_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_xdp_check_kfunc_ids) - -BTF_SET_START(nf_ct_tc_check_kfunc_ids) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_tc_check_kfunc_ids) - -BTF_SET_START(nf_ct_acquire_kfunc_ids) -BTF_ID(func, bpf_xdp_ct_lookup) -BTF_ID(func, bpf_skb_ct_lookup) -BTF_SET_END(nf_ct_acquire_kfunc_ids) - -BTF_SET_START(nf_ct_release_kfunc_ids) -BTF_ID(func, bpf_ct_release) -BTF_SET_END(nf_ct_release_kfunc_ids) - -/* Both sets are identical */ -#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids - -static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_xdp_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, -}; - -static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &nf_ct_tc_check_kfunc_ids, - .acquire_set = &nf_ct_acquire_kfunc_ids, - .release_set = &nf_ct_release_kfunc_ids, - .ret_null_set = &nf_ct_ret_null_kfunc_ids, +BTF_SET8_START(nf_ct_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE) +BTF_SET8_END(nf_ct_kfunc_set) + +static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = { + .owner = THIS_MODULE, + .set = &nf_ct_kfunc_set, }; int register_nf_conntrack_bpf(void) { int ret; - ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); - return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set); + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set); } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e585e1cefc77..792cb15bac40 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_check_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_check_kfunc_ids) +BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_testmod_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, }; extern int bpf_fentry_test1(int a); -- cgit v1.2.3-59-g8ed1b From 8dd5e75683f713c155880c30ea545f2360c9bf46 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:42 +0200 Subject: selftests/bpf: Add verifier tests for trusted kfunc args Make sure verifier rejects the bad cases and ensure the good case keeps working. The selftests make use of the bpf_kfunc_call_test_ref kfunc added in the previous patch only for verification. Acked-by: Yonghong Song Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-11-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/calls.c | 53 ++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 743ed34c1238..3fb4f69b1962 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -218,6 +218,59 @@ .result = REJECT, .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed", }, +{ + "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_ref", 10 }, + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "R1 must be referenced", +}, +{ + "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_release", 10 }, + }, + .result_unpriv = REJECT, + .result = ACCEPT, +}, { "calls: basic sanity", .insns = { -- cgit v1.2.3-59-g8ed1b From 6eb7fba007a728e8701be11e93e9da40c95ffad3 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 21 Jul 2022 15:42:43 +0200 Subject: selftests/bpf: Add tests for new nf_conntrack kfuncs Introduce selftests for the following kfunc helpers: - bpf_xdp_ct_alloc - bpf_skb_ct_alloc - bpf_ct_insert_entry - bpf_ct_set_timeout - bpf_ct_change_timeout - bpf_ct_set_status - bpf_ct_change_status Signed-off-by: Lorenzo Bianconi Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-12-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 8 +++ tools/testing/selftests/bpf/progs/test_bpf_nf.c | 85 +++++++++++++++++++++---- 2 files changed, 81 insertions(+), 12 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dd30b1e3a67c..cbada73a61f8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -39,6 +39,14 @@ void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); + ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry"); + ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry"); + ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup"); + /* allow some tolerance for test_delta_timeout value to avoid races. */ + ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); + ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); + /* expected status is IPS_SEEN_REPLY */ + ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); end: test_bpf_nf__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index f00a9731930e..196cd8dfe42a 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -8,6 +8,8 @@ #define EINVAL 22 #define ENOENT 2 +extern unsigned long CONFIG_HZ __kconfig; + int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; int test_einval_netns_id = 0; @@ -16,6 +18,11 @@ int test_eproto_l4proto = 0; int test_enonet_netns_id = 0; int test_enoent_lookup = 0; int test_eafnosupport = 0; +int test_alloc_entry = -EINVAL; +int test_insert_entry = -EAFNOSUPPORT; +int test_succ_lookup = -ENOENT; +u32 test_delta_timeout = 0; +u32 test_status = 0; struct nf_conn; @@ -26,31 +33,44 @@ struct bpf_ct_opts___local { u8 reserved[3]; } __attribute__((preserve_access_index)); +struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; static __always_inline void -nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, - struct bpf_ct_opts___local *, u32), +nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), void *ctx) { struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; + int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); - ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_einval_bpf_tuple = opts_def.error; opts_def.reserved[0] = 1; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.reserved[0] = 0; opts_def.l4proto = IPPROTO_TCP; if (ct) @@ -59,21 +79,24 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_einval_reserved = opts_def.error; opts_def.netns_id = -2; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_einval_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def) - 1); if (ct) bpf_ct_release(ct); else test_einval_len_opts = opts_def.error; opts_def.l4proto = IPPROTO_ICMP; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.l4proto = IPPROTO_TCP; if (ct) bpf_ct_release(ct); @@ -81,37 +104,75 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_eproto_l4proto = opts_def.error; opts_def.netns_id = 0xf00f; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_enonet_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_enoent_lookup = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_eafnosupport = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + bpf_ct_set_status(ct, IPS_CONFIRMED); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + /* update ct entry timeout */ + bpf_ct_change_timeout(ct_lk, 10000); + test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); + test_delta_timeout /= CONFIG_HZ; + test_status = IPS_SEEN_REPLY; + bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY); + bpf_ct_release(ct_lk); + test_succ_lookup = 0; + } + bpf_ct_release(ct_ins); + test_insert_entry = 0; + } + test_alloc_entry = 0; + } } SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { - nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { - nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } -- cgit v1.2.3-59-g8ed1b From c6f420ac9d251f694d030ed4ea43262f3f3ef39e Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:44 +0200 Subject: selftests/bpf: Add negative tests for new nf_conntrack kfuncs Test cases we care about and ensure improper usage is caught and rejected by the verifier. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-13-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 56 ++++++++- .../testing/selftests/bpf/progs/test_bpf_nf_fail.c | 134 +++++++++++++++++++++ 2 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index cbada73a61f8..7a74a1579076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -2,13 +2,29 @@ #include #include #include "test_bpf_nf.skel.h" +#include "test_bpf_nf_fail.skel.h" + +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} test_bpf_nf_fail_tests[] = { + { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" }, + { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, + { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, + { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, +}; enum { TEST_XDP, TEST_TC_BPF, }; -void test_bpf_nf_ct(int mode) +static void test_bpf_nf_ct(int mode) { struct test_bpf_nf *skel; int prog_fd, err; @@ -51,10 +67,48 @@ end: test_bpf_nf__destroy(skel); } +static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_bpf_nf_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_bpf_nf_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_bpf_nf_fail__load(skel); + if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_bpf_nf_fail__destroy(skel); +} + void test_bpf_nf(void) { + int i; if (test__start_subtest("xdp-ct")) test_bpf_nf_ct(TEST_XDP); if (test__start_subtest("tc-bpf-ct")) test_bpf_nf_ct(TEST_TC_BPF); + for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) { + if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name)) + test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name, + test_bpf_nf_fail_tests[i].err_msg); + } } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c new file mode 100644 index 000000000000..bf79af15c808 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; + +SEC("?tc") +int alloc_release(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_release(ct); + return 0; +} + +SEC("?tc") +int insert_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int lookup_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int set_timeout_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int set_status_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_status(ct, 0); + return 0; +} + +SEC("?tc") +int change_timeout_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int change_status_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_status(ct, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From e3fa4735f04dcebd49c78544eb6c363efdd6385f Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Thu, 21 Jul 2022 15:42:45 +0200 Subject: selftests/bpf: Fix test_verifier failed test in unprivileged mode Loading the BTF won't be permitted without privileges, hence only test for privileged mode by setting the prog type. This makes the test_verifier show 0 failures when unprivileged BPF is enabled. Fixes: 41188e9e9def ("selftest/bpf: Test for use-after-free bug fix in inline_bpf_loop") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220721134245.2450-14-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/verifier/bpf_loop_inline.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c index 2d0023659d88..a535d41dc20d 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -251,6 +251,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, .func_info_cnt = 2, BTF_TYPES -- cgit v1.2.3-59-g8ed1b From 16576a034c4ba2e3179f48554d4f1bd5c05382cd Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 20 Jul 2022 11:13:10 -0700 Subject: ping: support ipv6 ping socket flow labels Ping sockets don't appear to make any attempt to preserve flow labels created and set by userspace using IPV6_FLOWINFO_SEND. Instead they are clobbered by autolabels (if enabled) or zero. Grab the flowlabel out of the msghdr similar to how rawv6_sendmsg does it and move the memset up so it doesn't get zeroed after. Signed-off-by: Alan Brady Tested-by: Gurucharan Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- net/ipv6/ping.c | 6 ++- tools/testing/selftests/net/ipv6_flowlabel.c | 75 +++++++++++++++++++++------ tools/testing/selftests/net/ipv6_flowlabel.sh | 16 ++++++ 3 files changed, 81 insertions(+), 16 deletions(-) (limited to 'tools/testing') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ecf3a553a0dc..b1179f62bd23 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -64,6 +64,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (err) return err; + memset(&fl6, 0, sizeof(fl6)); + if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); if (msg->msg_namelen < sizeof(*u)) @@ -72,12 +74,15 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); + if (np->sndflow) + fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; + fl6.flowlabel = np->flow_label; } if (!oif) @@ -101,7 +106,6 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.sockc.tsflags = sk->sk_tsflags; ipc6.sockc.mark = sk->sk_mark; - memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; if (msg->msg_controllen) { diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c index a7c41375374f..708a9822259d 100644 --- a/tools/testing/selftests/net/ipv6_flowlabel.c +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -29,26 +30,48 @@ #ifndef IPV6_FLOWLABEL_MGR #define IPV6_FLOWLABEL_MGR 32 #endif +#ifndef IPV6_FLOWINFO_SEND +#define IPV6_FLOWINFO_SEND 33 +#endif #define FLOWLABEL_WILDCARD ((uint32_t) -1) static const char cfg_data[] = "a"; static uint32_t cfg_label = 1; +static bool use_ping; +static bool use_flowinfo_send; + +static struct icmp6hdr icmp6 = { + .icmp6_type = ICMPV6_ECHO_REQUEST +}; + +static struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, +}; static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) { char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; struct msghdr msg = {0}; - struct iovec iov = {0}; + struct iovec iov = { + .iov_base = (char *)cfg_data, + .iov_len = sizeof(cfg_data) + }; int ret; - iov.iov_base = (char *)cfg_data; - iov.iov_len = sizeof(cfg_data); + if (use_ping) { + iov.iov_base = &icmp6; + iov.iov_len = sizeof(icmp6); + } msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (with_flowlabel) { + if (use_flowinfo_send) { + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + } else if (with_flowlabel) { struct cmsghdr *cm; cm = (void *)control; @@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) ret = recvmsg(fd, &msg, 0); if (ret == -1) error(1, errno, "recv"); + if (use_ping) + goto parse_cmsg; if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) error(1, 0, "recv: truncated"); if (ret != sizeof(cfg_data)) @@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) if (memcmp(data, cfg_data, sizeof(data))) error(1, 0, "recv: data mismatch"); +parse_cmsg: cm = CMSG_FIRSTHDR(&msg); if (with_flowlabel) { if (!cm) @@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); fprintf(stderr, "recv with label %u\n", flowlabel); - if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) { fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", flowlabel, expect); + error(1, 0, "recv: flowlabel is wrong"); + } } else { fprintf(stderr, "recv without label\n"); @@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "l:")) != -1) { + while ((c = getopt(argc, argv, "l:ps")) != -1) { switch (c) { case 'l': cfg_label = strtoul(optarg, NULL, 0); break; + case 'p': + use_ping = true; + break; + case 's': + use_flowinfo_send = true; + break; default: error(1, 0, "%s: parse error", argv[0]); } @@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv) int main(int argc, char **argv) { - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(8000), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; const int one = 1; int fdt, fdr; + int prot = 0; + + addr.sin6_port = htons(8000); parse_opts(argc, argv); - fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (use_ping) { + fprintf(stderr, "attempting to use ping sockets\n"); + prot = IPPROTO_ICMPV6; + } + + fdt = socket(PF_INET6, SOCK_DGRAM, prot); if (fdt == -1) error(1, errno, "socket t"); - fdr = socket(PF_INET6, SOCK_DGRAM, 0); + fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0); if (fdr == -1) error(1, errno, "socket r"); if (connect(fdt, (void *)&addr, sizeof(addr))) error(1, errno, "connect"); - if (bind(fdr, (void *)&addr, sizeof(addr))) + if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr))) error(1, errno, "bind"); flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); @@ -216,13 +253,21 @@ int main(int argc, char **argv) do_recv(fdr, false, 0); } + if (use_flowinfo_send) { + fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n"); + addr.sin6_flowinfo = htonl(cfg_label); + if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one, + sizeof(one)) == -1) + error(1, errno, "setsockopt flowinfo_send"); + } + fprintf(stderr, "send label\n"); do_send(fdt, true, cfg_label); do_recv(fdr, true, cfg_label); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) + if (!use_ping && close(fdt)) error(1, errno, "close t"); return 0; diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh index d3bc6442704e..cee95e252bee 100755 --- a/tools/testing/selftests/net/ipv6_flowlabel.sh +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)" ./in_netns.sh \ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' +echo "TEST datapath (with ping-sockets)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p' + +echo "TEST datapath (with flowinfo-send)" +./in_netns.sh \ + sh -c './ipv6_flowlabel -l 1 -s' + +echo "TEST datapath (with ping-sockets flowinfo-send)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p -s' + echo OK. All tests passed -- cgit v1.2.3-59-g8ed1b From f664f9c6b4a1bb9a10af812df0fbbf6aac28fcc6 Mon Sep 17 00:00:00 2001 From: Jie2x Zhou Date: Tue, 19 Jul 2022 16:24:30 +0800 Subject: bpf/selftests: Fix couldn't retrieve pinned program in xdp veth test Before change: selftests: bpf: test_xdp_veth.sh Couldn't retrieve pinned program '/sys/fs/bpf/test_xdp_veth/progs/redirect_map_0': No such file or directory selftests: xdp_veth [SKIP] ok 20 selftests: bpf: test_xdp_veth.sh # SKIP After change: PING 10.1.1.33 (10.1.1.33) 56(84) bytes of data. 64 bytes from 10.1.1.33: icmp_seq=1 ttl=64 time=0.320 ms --- 10.1.1.33 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.320/0.320/0.320/0.000 ms selftests: xdp_veth [PASS] For the test case, the following can be found: ls /sys/fs/bpf/test_xdp_veth/progs/redirect_map_0 ls: cannot access '/sys/fs/bpf/test_xdp_veth/progs/redirect_map_0': No such file or directory ls /sys/fs/bpf/test_xdp_veth/progs/ xdp_redirect_map_0 xdp_redirect_map_1 xdp_redirect_map_2 Reported-by: kernel test robot Signed-off-by: Jie2x Zhou Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220719082430.9916-1-jie2x.zhou@intel.com --- tools/testing/selftests/bpf/test_xdp_veth.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index 392d28cc4e58..49936c4c8567 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -106,9 +106,9 @@ bpftool prog loadall \ bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0 -ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 -ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 -ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 +ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0 +ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1 +ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2 ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp -- cgit v1.2.3-59-g8ed1b From 1115169f47ae45eeb04c616c404492bc8268daa0 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 25 Jul 2022 16:32:53 +0200 Subject: selftests/bpf: Don't assign outer source IP to host The previous commit fixed a bug in the bpf_skb_set_tunnel_key helper to avoid dropping packets whose outer source IP address isn't assigned to a host interface. This commit changes the corresponding selftest to not assign the outer source IP address to an interface. Not assigning the source IP to an interface causes two issues in the existing test: 1. The ARP requests will fail for that IP address so we need to add the ARP entry manually. 2. The encapsulated ICMP echo reply traffic will not reach the VXLAN device. It will be dropped by the stack before, because the outer destination IP is unknown. To solve 2., we have two choices. Either we perform decapsulation ourselves in a BPF program attached at veth1 (the base device for the VXLAN device), or we switch the outer destination address when we receive the packet at veth1, such that the stack properly demultiplexes it to the VXLAN device afterward. This commit implements the second approach, where we switch the outer destination address from the unassigned IP address to the assigned one, only for VXLAN traffic ingressing veth1. Then, at the vxlan device, the BPF program that checks the output of bpf_skb_get_tunnel_key needs to be updated as the expected local IP address is now the unassigned one. Signed-off-by: Paul Chaignon Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/4addde76eaf3477a58975bef15ed2788c44e5f55.1658759380.git.paul@isovalent.com --- .../testing/selftests/bpf/prog_tests/test_tunnel.c | 17 ++++- .../testing/selftests/bpf/progs/test_tunnel_kern.c | 80 +++++++++++++++++++--- 2 files changed, 86 insertions(+), 11 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 3bba4a2a0530..eea274110267 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -82,6 +82,7 @@ #define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00" +#define MAC_VETH1 "52:54:00:d9:03:00" #define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV1 "vxlan11" @@ -108,10 +109,9 @@ static int config_device(void) { SYS("ip netns add at_ns0"); - SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); SYS("ip link set veth0 netns at_ns0"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); SYS("ip link set dev veth1 up mtu 1500"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); @@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ SYS("ip link add dev %s type vxlan external gbp dstport 4789", @@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) goto done; + /* load and attach bpf prog to veth dev tc hook point */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + goto done; + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 17f2f325b3f3..df0673c4ecbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -14,15 +14,24 @@ #include #include #include +#include #include #include #include #include +#include #include #include #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) +#define VXLAN_UDP_PORT 4789 + +/* Only IPv4 address assigned to veth1. + * 172.16.1.200 + */ +#define ASSIGNED_ADDR_VETH1 0xac1001c8 + struct geneve_opt { __be16 opt_class; __u8 type; @@ -33,6 +42,11 @@ struct geneve_opt { __u8 opt_data[8]; /* hard-coded to 8 byte */ }; +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct vxlan_metadata { __u32 gbp; }; @@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 orig_daddr; __u32 index = 0; - __u32 *local_ip = NULL; - - local_ip = bpf_map_lookup_elem(&local_ip_map, &index); - if (!local_ip) { - log_err(ret); - return TC_ACT_SHOT; - } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { @@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_SHOT; } - if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) { bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", key.tunnel_id, key.local_ipv4, key.remote_ipv4, md.gbp); - bpf_printk("local_ip 0x%x\n", *local_ip); log_err(ret); return TC_ACT_SHOT; } @@ -402,6 +409,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("tc") +int veth_set_outer_dst(struct __sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(long)skb->data; + __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1); + void *data_end = (void *)(long)skb->data_end; + struct udphdr *udph; + struct iphdr *iph; + __u32 index = 0; + int ret = 0; + int shrink; + __s64 csum; + + if ((void *)eth + sizeof(*eth) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + iph = (struct iphdr *)(eth + 1); + if ((void *)iph + sizeof(*iph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (iph->protocol != IPPROTO_UDP) + return TC_ACT_OK; + + udph = (struct udphdr *)(iph + 1); + if ((void *)udph + sizeof(*udph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (udph->dest != bpf_htons(VXLAN_UDP_PORT)) + return TC_ACT_OK; + + if (iph->daddr != assigned_ip) { + csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip, + sizeof(__u32), 0); + if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), + &assigned_ip, sizeof(__u32), 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), + 0, csum, 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + bpf_skb_change_type(skb, PACKET_HOST); + } + return TC_ACT_OK; +} + SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { -- cgit v1.2.3-59-g8ed1b From d295daf505758f9a0e4d05f4ee3bfdfb4192c18f Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 26 Jul 2022 15:40:08 +0200 Subject: selftests/bpf: Attach to socketcall() in test_probe_user test_probe_user fails on architectures where libc uses socketcall(SYS_CONNECT) instead of connect(). Fix by attaching to socketcall as well. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20220726134008.256968-3-iii@linux.ibm.com --- .../testing/selftests/bpf/prog_tests/probe_user.c | 35 +++++++++++++++------- .../testing/selftests/bpf/progs/test_probe_user.c | 29 ++++++++++++++++-- 2 files changed, 51 insertions(+), 13 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066eb..34dbd2adc157 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -4,25 +4,35 @@ /* TODO: corrupts other tests uses connect() */ void serial_test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + static const char *const prog_names[] = { + "handle_sys_connect", +#if defined(__s390x__) + "handle_sys_socketcall", +#endif + }; + enum { prog_count = ARRAY_SIZE(prog_names) }; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; struct sockaddr curr, orig, tmp; struct sockaddr_in *in = (struct sockaddr_in *)&curr; - struct bpf_link *kprobe_link = NULL; - struct bpf_program *kprobe_prog; + struct bpf_link *kprobe_links[prog_count] = {}; + struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + size_t i; obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", prog_name)) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_progs[i] = + bpf_object__find_program_by_name(obj, prog_names[i]); + if (CHECK(!kprobe_progs[i], "find_probe", + "prog '%s' not found\n", prog_names[i])) + goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -33,9 +43,11 @@ void serial_test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach(kprobe_prog); - if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_links[i] = bpf_program__attach(kprobe_progs[i]); + if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe")) + goto cleanup; + } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; @@ -69,6 +81,7 @@ void serial_test_probe_user(void) inet_ntoa(in->sin_addr), ntohs(in->sin_port))) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); + for (i = 0; i < prog_count; i++) + bpf_link__destroy(kprobe_links[i]); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 8e1495008e4d..a8e501af9604 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -7,8 +7,7 @@ static struct sockaddr_in old; -SEC("ksyscall/connect") -int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int addrlen) +static int handle_sys_connect_common(struct sockaddr_in *uservaddr) { struct sockaddr_in new; @@ -19,4 +18,30 @@ int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, int return 0; } +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, + int addrlen) +{ + return handle_sys_connect_common(uservaddr); +} + +#if defined(bpf_target_s390) +#ifndef SYS_CONNECT +#define SYS_CONNECT 3 +#endif + +SEC("ksyscall/socketcall") +int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args) +{ + if (call == SYS_CONNECT) { + struct sockaddr_in *uservaddr; + + bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]); + return handle_sys_connect_common(uservaddr); + } + + return 0; +} +#endif + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3-59-g8ed1b From e96c8da380391cca7889a4fd4b09c8fbc14aeef8 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:24 +0200 Subject: selftests: mlxsw: Check line card info on provisioned line card Once line card is provisioned, check if HW revision and INI version are exposed on associated nested auxiliary device. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/devlink_linecard.sh | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index 08a922d8b86a..ca4e9b08a105 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -84,6 +84,13 @@ lc_wait_until_port_count_is() busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" } +lc_nested_devlink_dev_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink" +} + PROV_UNPROV_TIMEOUT=8000 # ms POST_PROV_ACT_TIMEOUT=2000 # ms PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms @@ -191,12 +198,30 @@ ports_check() check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" } +lc_dev_info_provisioned_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + provision_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT supported_types_check $lc @@ -207,6 +232,11 @@ provision_test() fi provision_one $lc $LC_16X100G_TYPE ports_check $lc $LC_16X100G_PORT_COUNT + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_provisioned_check $lc $nested_devlink_dev + log_test "Provision" } -- cgit v1.2.3-59-g8ed1b From 949c84f05eb65b0a41f7f5c62ee9ffc5d8e39c89 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Jul 2022 10:29:25 +0200 Subject: selftests: mlxsw: Check line card info on activated line card Once line card is activated, check the FW version and PSID are exposed. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/devlink_linecard.sh | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index ca4e9b08a105..224ca3695c89 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -250,12 +250,32 @@ interface_check() setup_wait } +lc_dev_info_active_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_device_fw_psid + local running_device_fw + + fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.fixed" | \ + jq -e -r '."fw.psid"') + check_err $? "Failed to get linecard $lc fixed fw PSID" + log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\"" + + running_device_fw=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.running.fw") + check_err $? "Failed to get linecard $lc running.fw.version" + log_info "Linecard $lc running.fw: \"$running_device_fw\"" +} + activation_16x100G_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT type=$LC_16X100G_TYPE @@ -268,6 +288,10 @@ activation_16x100G_test() interface_check + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_active_check $lc $nested_devlink_dev + log_test "Activation 16x100G" } -- cgit v1.2.3-59-g8ed1b From 060468f0ddbbd3396944ad234077e44af1841efc Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Mon, 25 Jul 2022 10:01:24 +0800 Subject: selftests: net: Fix typo 'the the' in comment Replace 'the the' with 'the' in the comment. Signed-off-by: Slark Xiao Link: https://lore.kernel.org/r/20220725020124.5760-1-slark_xiao@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index 0727e2012b68..43469c7de118 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -525,7 +525,7 @@ arp_suppression() log_test "neigh_suppress: on / neigh exists: yes" - # Delete the neighbour from the the SVI. A single ARP request should be + # Delete the neighbour from the SVI. A single ARP request should be # received by the remote VTEP RET=0 -- cgit v1.2.3-59-g8ed1b From aee993bbd05cede097885fa74e2627a458d3418a Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 27 Jul 2022 00:11:54 +0000 Subject: selftests/bpf: Sort configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change makes sure to sort the existing minimal kernel configuration containing options required for running BPF selftests alphabetically. Doing so will make it easier to diff it against other configurations, which in turn helps with maintaining disjunct config files that build on top of each other. It also helped identify the CONFIG_IPV6_GRE being set twice and removes one of the occurrences. Lastly, we change NET_CLS_BPF from 'm' to 'y'. Having this option as 'm' will cause failures of the btf_skc_cls_ingress selftest. Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-2-deso@posteo.net --- tools/testing/selftests/bpf/config | 99 +++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 50 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c05904d631ec..fabf0c014349 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,65 +1,64 @@ +CONFIG_BLK_DEV_LOOP=y CONFIG_BPF=y -CONFIG_BPF_SYSCALL=y -CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y -CONFIG_TEST_BPF=m +CONFIG_BPF_JIT=y +CONFIG_BPF_LIRC_MODE2=y +CONFIG_BPF_LSM=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NETDEVSIM=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_INGRESS=y -CONFIG_NET_IPIP=y -CONFIG_IPV6=y -CONFIG_NET_IPGRE_DEMUX=y -CONFIG_NET_IPGRE=y -CONFIG_IPV6_GRE=y -CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m -CONFIG_VXLAN=y -CONFIG_GENEVE=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_LWTUNNEL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_XDP_SOCKETS=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_IPV6_TUNNEL=y +CONFIG_FUNCTION_TRACER=y +CONFIG_GENEVE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IMA=y +CONFIG_IMA_READ_POLICY=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IPV6=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m CONFIG_IPV6_GRE=y CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=y +CONFIG_LIRC=y +CONFIG_LWTUNNEL=y +CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPTCP=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m -CONFIG_MPLS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_IPV6_SIT=m -CONFIG_BPF_JIT=y -CONFIG_BPF_LSM=y -CONFIG_SECURITY=y -CONFIG_RC_CORE=y -CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y -CONFIG_IMA=y -CONFIG_SECURITYFS=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_READ_POLICY=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_FUNCTION_TRACER=y -CONFIG_DYNAMIC_FTRACE=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCHED=y +CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NF_CONNTRACK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y -CONFIG_NF_CONNTRACK=y +CONFIG_RC_CORE=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y -CONFIG_FPROBE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_MPTCP=y -CONFIG_NETFILTER_SYNPROXY=y -CONFIG_NETFILTER_XT_TARGET_CT=y -CONFIG_NETFILTER_XT_MATCH_STATE=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_SYNPROXY=y -CONFIG_IP_NF_RAW=y +CONFIG_VXLAN=y +CONFIG_XDP_SOCKETS=y -- cgit v1.2.3-59-g8ed1b From cbd620fc18cad51500c46e222328ca60adaa4644 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 27 Jul 2022 00:11:55 +0000 Subject: selftests/bpf: Copy over libbpf configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change integrates libbpf maintained configurations and black/white lists [0] into the repository, co-located with the BPF selftests themselves. We minimize the kernel configurations to keep future updates as small as possible [1]. Furthermore, we make both kernel configurations build on top of the existing configuration tools/testing/selftests/bpf/config (to be concatenated before build). Lastly, we replaced the terms blacklist & whitelist with denylist and allowlist, respectively. [0] https://github.com/libbpf/libbpf/tree/20f03302350a4143825cedcbd210c4d7112c1898/travis-ci/vmtest/configs [1] https://lore.kernel.org/bpf/20220712212124.3180314-1-deso@posteo.net/T/#m30a53648352ed494e556ac003042a9ad0a8f98c6 Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-3-deso@posteo.net --- tools/testing/selftests/bpf/DENYLIST | 6 + tools/testing/selftests/bpf/DENYLIST.s390x | 67 ++++++++ tools/testing/selftests/bpf/config.s390x | 147 +++++++++++++++++ tools/testing/selftests/bpf/config.x86_64 | 251 +++++++++++++++++++++++++++++ 4 files changed, 471 insertions(+) create mode 100644 tools/testing/selftests/bpf/DENYLIST create mode 100644 tools/testing/selftests/bpf/DENYLIST.s390x create mode 100644 tools/testing/selftests/bpf/config.s390x create mode 100644 tools/testing/selftests/bpf/config.x86_64 (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST new file mode 100644 index 000000000000..939de574fc7f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp +varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x new file mode 100644 index 000000000000..e33cab34d22f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -0,0 +1,67 @@ +# TEMPORARY +atomics # attach(add): actual -524 <= expected 0 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_loop # attaches to __x64_sys_nanosleep +bpf_mod_race # BPF trampoline +bpf_nf # JIT does not support calling kernel function +core_read_macros # unknown func bpf_probe_read#4 (overlapping) +d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) +fentry_fexit # fentry attach failed: -524 (trampoline) +fentry_test # fentry_first_attach unexpected error: -524 (trampoline) +fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) +fexit_test # fexit_first_attach unexpected error: -524 (trampoline) +get_func_args_test # trampoline +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) +get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) +ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) +ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +modify_return # modify_return attach failed: -524 (trampoline) +module_attach # skel_attach skeleton attach failed: -524 (trampoline) +mptcp +kprobe_multi_test # relies on fentry +netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) +probe_user # check_kprobe_res wrong kprobe res from probe read (?) +recursion # skel_attach unexpected error: -524 (trampoline) +ringbuf # skel_load skeleton load failed (?) +sk_assign # Can't read on server: Invalid argument (?) +sk_lookup # endianness problem +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) +socket_cookie # prog_attach unexpected error: -524 (trampoline) +stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) +test_bpffs # bpffs test failed 255 (iterator) +test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) +test_ima # failed to auto-attach program 'ima': -524 (trampoline) +test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) +test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_overhead # attach_fentry unexpected error: -524 (trampoline) +test_profiler # unknown func bpf_probe_read_str#45 (overlapping) +timer # failed to auto-attach program 'test1': -524 (trampoline) +timer_crash # trampoline +timer_mim # failed to auto-attach program 'test1': -524 (trampoline) +trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) +trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) +trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) +xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +map_kptr # failed to open_and_load program: -524 (trampoline) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) +send_signal # intermittently fails to receive signal +select_reuseport # intermittently fails on new s390x setup +xdp_synproxy # JIT does not support calling kernel function (kfunc) +unpriv_bpf_disabled # fentry diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x new file mode 100644 index 000000000000..f8a7a258a718 --- /dev/null +++ b/tools/testing/selftests/bpf/config.s390x @@ -0,0 +1,147 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FANOTIFY=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_MARCH_Z10_FEATURES=y +CONFIG_HAVE_MARCH_Z196_FEATURES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MARCH_Z196=y +CONFIG_MARCH_Z196_TUNE=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NF_TABLES=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITY_NETWORK=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 new file mode 100644 index 000000000000..f0859a1d37ab --- /dev/null +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -0,0 +1,251 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_AGP_SIS=y +CONFIG_AGP_VIA=y +CONFIG_AMIGA_PARTITION=y +CONFIG_AUDIT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KYE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETLABEL=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA=y +CONFIG_NUMA_BALANCING=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XDP_SOCKETS_DIAG=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y -- cgit v1.2.3-59-g8ed1b From 40b09653b1977c9630f24c9ca17322d5b38f1ca5 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 27 Jul 2022 00:11:56 +0000 Subject: selftests/bpf: Adjust vmtest.sh to use local kernel configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far the vmtest.sh script, which can be used as a convenient way to run bpf selftests, has obtained the kernel config safe to use for testing from the libbpf/libbpf GitHub repository [0]. Given that we now have included this configuration into this very repository, we can just consume it from here as well, eliminating the necessity of remote accesses. With this change we adjust the logic in the script to use the configuration from below tools/testing/selftests/bpf/configs/ instead of pulling it over the network. [0] https://github.com/libbpf/libbpf Signed-off-by: Daniel Müller Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Mykola Lysenko Link: https://lore.kernel.org/bpf/20220727001156.3553701-4-deso@posteo.net --- tools/testing/selftests/bpf/vmtest.sh | 53 ++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index e0bb04a97e10..b86ae4a2e5c5 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" @@ -269,26 +268,42 @@ is_rel_path() [[ ${path:0:1} != "/" ]] } +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + update_kconfig() { - local kconfig_file="$1" - local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" - # Github does not return the "last-modified" header when retrieving the - # raw contents of the file. Use the API call to get the last-modified - # time of the kernel config and only update the config if it has been - # updated after the previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ -f "${kconfig_file}" ]]; then - local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ - grep "last-modified" | awk -F ': ' '{print $2}')" - local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" - local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + local kernel_checkout="$1" + local kconfig_file="$2" - if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then - ${update_command} - fi + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done else - ${update_command} + do_update_kconfig "$kernel_checkout" "$kconfig_file" fi } @@ -372,7 +387,7 @@ main() mkdir -p "${OUTPUT_DIR}" mkdir -p "${mount_dir}" - update_kconfig "${kconfig_file}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" -- cgit v1.2.3-59-g8ed1b From 86c591fb9142e772d3ba26b601f4a49123e7079c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Jul 2022 20:15:21 -0700 Subject: selftests: tls: handful of memrnd() and length checks Add a handful of memory randomizations and precise length checks. Nothing is really broken here, I did this to increase confidence when debugging. It does fix a GCC warning, tho. Apparently GCC recognizes that memory needs to be initialized for send() but does not recognize that for write(). Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4ecbac197c46..2cbb12736596 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -644,12 +644,14 @@ TEST_F(tls, splice_from_pipe2) int p2[2]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); ASSERT_GE(pipe(p2), 0); - EXPECT_GE(write(p[1], mem_send, 8000), 0); - EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); - EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); + EXPECT_EQ(write(p[1], mem_send, 8000), 8000); + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000); + EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000); + EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000); EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -683,10 +685,12 @@ TEST_F(tls, splice_to_pipe) char mem_recv[TLS_PAYLOAD_MAX_LEN]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); - EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0); - EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0); - EXPECT_GE(read(p[0], mem_recv, send_len), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -875,6 +879,8 @@ TEST_F(tls, multiple_send_single_recv) char recv_mem[2 * 10]; char send_mem[10]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); @@ -891,6 +897,8 @@ TEST_F(tls, single_send_multiple_recv_non_align) char recv_mem[recv_len * 2]; char send_mem[total_len]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -936,10 +944,10 @@ TEST_F(tls, recv_peek) char buf[15]; EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); memset(buf, 0, sizeof(buf)); - EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } -- cgit v1.2.3-59-g8ed1b From 6ecf206d602fafd077811b6033c183deb0c0a9c8 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 27 Jul 2022 21:16:42 +0200 Subject: selftests: net: dsa: Add a Makefile which installs the selftests Add a Makefile which takes care of installing the selftests in tools/testing/selftests/drivers/net/dsa. This can be used to install all DSA specific selftests and forwarding.config using the same approach as for the selftests in tools/testing/selftests/net/forwarding. Signed-off-by: Martin Blumenstingl Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20220727191642.480279-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/dsa/Makefile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/dsa/Makefile (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile new file mode 100644 index 000000000000..2a731d5c6d85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mld.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ + bridge_vlan_unaware.sh \ + local_termination.sh \ + no_forwarding.sh \ + test_bridge_fdb_stress.sh + +TEST_PROGS_EXTENDED := lib.sh + +TEST_FILES := forwarding.config + +include ../../../lib.mk -- cgit v1.2.3-59-g8ed1b From 6ab4eb5a52a734dc60e0366c66e06446a0032903 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 27 Jul 2022 20:54:07 +0200 Subject: selftests: seg6: add selftest for SRv6 H.Encaps.Red behavior This selftest is designed for testing the H.Encaps.Red behavior. It instantiates a virtual network composed of several nodes: hosts and SRv6 routers. Each node is realized using a network namespace that is properly interconnected to others through veth pairs. The test considers SRv6 routers implementing L3 VPNs leveraged by hosts for communicating with each other. Such routers make use of the SRv6 H.Encaps.Red behavior for applying SRv6 policies to L3 traffic coming from hosts. The correct execution of the behavior is verified through reachability tests carried out between hosts belonging to the same VPN. Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/srv6_hencap_red_l3vpn_test.sh | 879 +++++++++++++++++++++ 2 files changed, 880 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 80628bf8413a..7ac6ff3748ed 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -35,6 +35,7 @@ TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += srv6_hencap_red_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh new file mode 100755 index 000000000000..28a775654b92 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer +# +# This script is designed for testing the SRv6 H.Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers advanced +# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each +# other. +# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN +# service, while hs-3 and hs-4 are connected using an IPv6 only VPN. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received +# by connected hosts, initiating the VPN tunnel. Such a behavior is an +# optimization of the SRv6 H.Encap aiming to reduce the length of the SID +# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes +# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it +# into the IPv6 Destination Address. When a SRv6 Policy is made of only one +# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that +# SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +--- +---+ +# | | | | +# | hs-4 | | hs-3 | +# | | | | +# +--------+ +--------+ +# cafe::4 cafe::3 +# 10.0.0.4 10.0.0.3 +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# # SRv6 Policies +# =============== +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# IPv4/IPv6 VPN between hs-1 and hs-2 +# ----------------------------------- +# +# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46 +# ii.a) IPv4 traffic, SID List=fcff:2::d46 +# +# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers +# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through +# rt-2. +# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the +# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4 +# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID +# List (ii.a) consists of only one SID that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) IPv6 traffic, SID List=fcff:1::d46 +# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46 +# +# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1. +# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers +# rt-4,rt-3,rt-1. +# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single +# SID (fcff::1::d46) inside the IPv6 DA. +# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first +# SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a) +# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a) +# +# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b) +# +# +# IPv6 VPN between hs-3 and hs-4 +# ------------------------------ +# +# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN. +# Specifically, packets generated from hs-3 and directed towards hs-4 are +# handled by rt-3 which applies the following SRv6 Policy: +# +# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46 +# +# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4. +# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the +# first SID (fcff:2::e) in the IPv6 DA. +# +# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the +# following SRv6 Policy: +# +# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46. +# +# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3. +# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the +# first SID (fcff:1::e) in the IPv6 DA. +# +# In summary: +# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c) +# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly DT46_FUNC=0d46 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is dummy and the VRF is chosen + # for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${VRF_DEVNAME}" + + # Local End.DT46 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${nsname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2 3 4"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + setup_hs 3 3 + setup_hs 4 4 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-3,rt-4 (SRv6 End behaviors) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red + setup_rt_policy_ipv6 1 2 "" 1 encap.red + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv4 2 1 "" 2 encap.red + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + + # create an IPv6 VPN between hosts hs-3 and hs-4 + # the network path between hs-3 and hs-4 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-3 -> hs-4 (H.Encaps.Red) + # - rt-2 (SRv6 End Behavior) + # - rt-4 (SRv6 End.DT46 behavior) + # + # Direction hs-4 -> hs-3 (H.Encaps.Red) + # - rt-1 (SRv6 End behavior) + # - rt-3 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 4 3 "2" 4 encap.red + setup_rt_policy_ipv6 3 4 "1" 3 encap.red + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +check_and_log_hs_ipv6_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_ipv4_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}" + check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)" + + check_and_log_hs_ipv6_connectivity 3 4 + check_and_log_hs_ipv6_connectivity 4 3 +} + +host_vpn_isolation_tests() +{ + local l1="1 2" + local l2="3 4" + local tmp + local i + local j + local k + + log_section "SRv6 VPN isolation test among hosts" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation "${i}" "${j}" + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + done + + log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)" + + check_and_log_hs_ipv4_isolation 2 4 + check_and_log_hs_ipv4_isolation 4 2 +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "encap.red"; then + echo "SKIP: Missing SRv6 encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results -- cgit v1.2.3-59-g8ed1b From 95baa4e8fe69ae511cbbcfd0ffef010108e2ca43 Mon Sep 17 00:00:00 2001 From: Andrea Mayer Date: Wed, 27 Jul 2022 20:54:08 +0200 Subject: selftests: seg6: add selftest for SRv6 H.L2Encaps.Red behavior This selftest is designed for testing the H.L2Encaps.Red behavior. It instantiates a virtual network composed of several nodes: hosts and SRv6 routers. Each node is realized using a network namespace that is properly interconnected to others through veth pairs. The test considers SRv6 routers implementing a L2 VPN leveraged by hosts for communicating with each other. Such routers make use of the SRv6 H.L2Encaps.Red behavior for applying SRv6 policies to L2 traffic coming from hosts. The correct execution of the behavior is verified through reachability tests carried out between hosts belonging to the same VPN. Signed-off-by: Andrea Mayer Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/srv6_hl2encap_red_l2vpn_test.sh | 821 +++++++++++++++++++++ 2 files changed, 822 insertions(+) create mode 100755 tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7ac6ff3748ed..cd86d37146cc 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += srv6_hencap_red_l3vpn_test.sh +TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh new file mode 100755 index 000000000000..cb4177d41b21 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -0,0 +1,821 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer +# +# This script is designed for testing the SRv6 H.L2Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers L2 VPN +# services to hosts, enabling them to communicate with each other. +# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service. +# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange +# full L2 frames as long as they carry IPv4/IPv6. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic +# received by connected hosts, initiating the VPN tunnel. Such a behavior +# is an optimization of the SRv6 H.L2Encap aiming to reduce the +# length of the SID List carried in the pushed SRH. Specifically, the +# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6 +# Policy) by storing it into the IPv6 Destination Address. When a SRv6 +# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits +# the SRH at all and pushes that SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List +# carried by the SRH; +# +# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy +# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is +# sent over the interface connected with the destination host. +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# L2 VPN between hs-1 and hs-2 +# ---------------------------- +# +# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) L2 traffic, SID List=fcff:2::d2 +# +# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2. +# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List +# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2 +# +# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers +# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing +# the first SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth-hs" +readonly HS_VETH_NAME="veth0" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly MAC_PREFIX=00:00:00:c0:01 +readonly END_FUNC=000e +readonly DX2_FUNC=00d2 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy + # interface) + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behaviors instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule add \ + to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + else + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_decap() +{ + local rt="$1" + local nsname + + nsname="$(get_rtname "${rt}")" + + # Local End.DX2 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \ + dev "${RT2HS_DEVNAME}" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \ + dev "${HS_VETH_NAME}" nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \ + dev "${HS_VETH_NAME}" + + ip -netns "${hsname}" link set "${HS_VETH_NAME}" up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \ + dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 +} + +# set an auto-generated mac address +# args: +# $1 - name of the node (e.g.: hs-1, rt-3, etc) +# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc) +# $3 - host part of the IPv6 network address +# $4 - name of the network interface to which the generated mac address must +# be set. +set_mac_address() +{ + local nodename="$1" + local nodeid="$2" + local host="$3" + local ifname="$4" + local nsname + + nsname=$(get_nodename "${nodename}") + + ip -netns "${nsname}" link set dev "${ifname}" down + + ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \ + dev "${ifname}" + + # the IPv6 address must be set once again after the MAC address has + # been changed. + ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \ + dev "${ifname}" nodad + + ip -netns "${nsname}" link set dev "${ifname}" up +} + +set_host_l2peer() +{ + local hssrc="$1" + local hsdst="$2" + local ipprefix="$3" + local proto="$4" + local hssrc_name + local ipaddr + + hssrc_name="$(get_hsname "${hssrc}")" + + if [ "${proto}" -eq 6 ]; then + ipaddr="${ipprefix}::${hsdst}" + else + ipaddr="${ipprefix}.${hsdst}" + fi + + ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}" + + ip -netns "${hssrc_name}" neigh \ + add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \ + dev "${HS_VETH_NAME}" +} + +# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6 +# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6). +# args: +# $1 - source host +# $2 - SRv6 routers configured for steering tunneled traffic +# $3 - destination host +setup_l2vpn() +{ + local hssrc="$1" + local end_rts="$2" + local hsdst="$3" + local rtsrc="${hssrc}" + local rtdst="${hsdst}" + + # set fixed mac for source node and the neigh MAC address + set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 + + # we have to set the mac address of the veth-host (on ingress router) + # to the mac address of the remote peer (L2 VPN destination host). + # Otherwise, traffic coming from the source host is dropped at the + # ingress router. + set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + + # set the SRv6 Policies at the ingress router + setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 6 + setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 4 + + # set the decap behavior + setup_decap "${rtsrc}" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # create a L2 VPN between hs-1 and hs-2. + # NB: currently, H.L2Encap* enables tunneling of L2 frames whose + # layer-3 is IPv4/IPv6. + # + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.L2Encaps.Red) + # - rt-2 (SRv6 End.DX2 behavior) + # + # Direction hs-2 -> hs-1 (H.L2Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DX2 behavior) + setup_l2vpn 1 "" 2 + setup_l2vpn 2 "4 3" 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "l2encap.red"; then + echo "SKIP: Missing SRv6 l2encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results -- cgit v1.2.3-59-g8ed1b From 40823f3ee05f7f55cbd4419062a1a388249e88da Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jul 2022 14:45:35 +0300 Subject: selftests: netdevsim: Add test cases for route deletion failure Add IPv4 and IPv6 test cases that ensure that we are not leaking a reference on the nexthop device when we are unable to delete its associated route. Without the fix in a previous patch ("netdevsim: fib: Fix reference count leak on route deletion failure") both test cases get stuck, waiting for the reference to be released from the dummy device [1][2]. [1] unregister_netdevice: waiting for dummy1 to become free. Usage count = 5 leaked reference. fib_check_nh+0x275/0x620 fib_create_info+0x237c/0x4d30 fib_table_insert+0x1dd/0x1d20 inet_rtm_newroute+0x11b/0x200 rtnetlink_rcv_msg+0x43b/0xd20 netlink_rcv_skb+0x15e/0x430 netlink_unicast+0x53b/0x800 netlink_sendmsg+0x945/0xe40 ____sys_sendmsg+0x747/0x960 ___sys_sendmsg+0x11d/0x190 __sys_sendmsg+0x118/0x1e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd [2] unregister_netdevice: waiting for dummy1 to become free. Usage count = 5 leaked reference. fib6_nh_init+0xc46/0x1ca0 ip6_route_info_create+0x1167/0x19a0 ip6_route_add+0x27/0x150 inet6_rtm_newroute+0x161/0x170 rtnetlink_rcv_msg+0x43b/0xd20 netlink_rcv_skb+0x15e/0x430 netlink_unicast+0x53b/0x800 netlink_sendmsg+0x945/0xe40 ____sys_sendmsg+0x747/0x960 ___sys_sendmsg+0x11d/0x190 __sys_sendmsg+0x118/0x1e0 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: David Ahern Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/netdevsim/fib.sh | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index fc794cd30389..6800de816e8b 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -16,6 +16,7 @@ ALL_TESTS=" ipv4_replay ipv4_flush ipv4_error_path + ipv4_delete_fail ipv6_add ipv6_metric ipv6_append_single @@ -29,11 +30,13 @@ ALL_TESTS=" ipv6_replay_single ipv6_replay_multipath ipv6_error_path + ipv6_delete_fail " NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/fib_offload_lib.sh @@ -157,6 +160,27 @@ ipv4_error_path() ipv4_error_path_replay } +ipv4_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 192.0.2.0/24 dev dummy1 + ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv4 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + ipv6_add() { fib_ipv6_add_test "testns1" @@ -304,6 +328,27 @@ ipv6_error_path() ipv6_error_path_replay } +ipv6_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 2001:db8:1::/64 dev dummy1 + ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv6 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + fib_notify_on_flag_change_set() { local notify=$1; shift -- cgit v1.2.3-59-g8ed1b From 639de43ef0dda165441af400ecb372e16b7f9354 Mon Sep 17 00:00:00 2001 From: Daniel Müller Date: Wed, 27 Jul 2022 18:29:55 +0000 Subject: selftests/bpf: Bump internal send_signal/send_signal_tracepoint timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The send_signal/send_signal_tracepoint is pretty flaky, with at least one failure in every ten runs on a few attempts I've tried it: > test_send_signal_common:PASS:pipe_c2p 0 nsec > test_send_signal_common:PASS:pipe_p2c 0 nsec > test_send_signal_common:PASS:fork 0 nsec > test_send_signal_common:PASS:skel_open_and_load 0 nsec > test_send_signal_common:PASS:skel_attach 0 nsec > test_send_signal_common:PASS:pipe_read 0 nsec > test_send_signal_common:PASS:pipe_write 0 nsec > test_send_signal_common:PASS:reading pipe 0 nsec > test_send_signal_common:PASS:reading pipe error: size 0 0 nsec > test_send_signal_common:FAIL:incorrect result unexpected incorrect result: actual 48 != expected 50 > test_send_signal_common:PASS:pipe_write 0 nsec > #139/1 send_signal/send_signal_tracepoint:FAIL The reason does not appear to be a correctness issue in the strict sense. Rather, we merely do not receive the signal we are waiting for within the provided timeout. Let's bump the timeout by a factor of ten. With that change I have not been able to reproduce the failure in 150+ iterations. I am also sneaking in a small simplification to the test_progs test selection logic. Signed-off-by: Daniel Müller Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220727182955.4044988-1-deso@posteo.net --- tools/testing/selftests/bpf/prog_tests/send_signal.c | 2 +- tools/testing/selftests/bpf/test_progs.c | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d71226e34c34..d63a20fbed33 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 100000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) j /= i + j + 1; buf[0] = sigusr1_received ? '2' : '0'; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c639f2e56fc5..3561c97701f2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1604,11 +1604,8 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - if (should_run(&env.test_selector, - test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; + test->should_run = should_run(&env.test_selector, + test->test_num, test->test_name); if ((test->run_test == NULL && test->run_serial_test == NULL) || (test->run_test != NULL && test->run_serial_test != NULL)) { -- cgit v1.2.3-59-g8ed1b From 1995943c3f2a59d73efe8bf9b33a92d0f1812af3 Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Mon, 1 Aug 2022 14:46:15 +0200 Subject: selftests: net: fix IOAM test skip return code The ioam6.sh test script exits with an error code (1) when tests are skipped due to lack of support from userspace/kernel or not enough permissions. It should return the kselftests SKIP code instead. Reviewed-by: Justin Iurman Signed-off-by: Kleber Sacilotto de Souza Link: https://lore.kernel.org/r/20220801124615.256416-1-kleber.souza@canonical.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/ioam6.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index a2b9fad5a9a6..4ceb401da1bf 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,6 +117,8 @@ # | Schema Data | | # +-----------------------------------------------------------+ +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 ################################################################################ # # @@ -211,7 +213,7 @@ check_kernel_compatibility() echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ @@ -227,7 +229,7 @@ check_kernel_compatibility() "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip link del veth0 2>/dev/null || true @@ -752,20 +754,20 @@ nfailed=0 if [ "$(id -u)" -ne 0 ] then echo "SKIP: Need root privileges" - exit 1 + exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then echo "SKIP: Could not run test without ip tool" - exit 1 + exit $ksft_skip fi ip ioam &>/dev/null if [ $? = 1 ] then echo "SKIP: iproute2 too old, missing ioam command" - exit 1 + exit $ksft_skip fi check_kernel_compatibility -- cgit v1.2.3-59-g8ed1b From 2a8f91d2898edf84166976112837f4996c68f706 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 2 Aug 2022 14:56:11 +0200 Subject: wireguard: selftests: update config fragments The kernel.config and debug.config fragments in wireguard selftests mention some config symbols that have been reworked: Commit c5665868183f ("mm: kmemleak: use the memory pool for early allocations") removes the config DEBUG_KMEMLEAK_EARLY_LOG_SIZE and since then, the config's feature is available without further configuration. Commit 4675ff05de2d ("kmemcheck: rip it out") removes kmemcheck and the corresponding arch config HAVE_ARCH_KMEMCHECK. There is no need for this config. Commit 3bf195ae6037 ("netfilter: nat: merge nf_nat_ipv4,6 into nat core") removes the config NF_NAT_IPV4 and since then, the config's feature is available without further configuration. Commit 41a2901e7d22 ("rcu: Remove SPARSE_RCU_POINTER Kconfig option") removes the config SPARSE_RCU_POINTER and since then, the config's feature is enabled by default. Commit dfb4357da6dd ("time: Remove CONFIG_TIMER_STATS") removes the feature and config CONFIG_TIMER_STATS without any replacement. Commit 3ca17b1f3628 ("lib/ubsan: remove null-pointer checks") removes the check and config UBSAN_NULL without any replacement. Adjust the config fragments to those changes in configs. Signed-off-by: Lukas Bulwahn Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/debug.config | 5 ----- tools/testing/selftests/wireguard/qemu/kernel.config | 1 - 2 files changed, 6 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 2b321b8a96cf..9d172210e2c6 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -18,15 +18,12 @@ CONFIG_DEBUG_VM=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_HAVE_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_HAVE_ARCH_KMEMCHECK=y CONFIG_HAVE_ARCH_KASAN=y CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y -CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_SHIRQ=y CONFIG_WQ_WATCHDOG=y @@ -35,7 +32,6 @@ CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y @@ -49,7 +45,6 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y -CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 CONFIG_RCU_TRACE=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index bad88f4b0a03..75267bd9c8ad 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y -- cgit v1.2.3-59-g8ed1b From b438b3b8d6e6ee1359a66c508345703888e61346 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 2 Aug 2022 14:56:13 +0200 Subject: wireguard: selftests: support UML This shoud open up various possibilities like time travel execution, and is also just another platform to help shake out bugs. Cc: Johannes Berg Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/qemu/Makefile | 17 ++++++++++++++++- tools/testing/selftests/wireguard/qemu/arch/um.config | 3 +++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/wireguard/qemu/arch/um.config (limited to 'tools/testing') diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 9700358e4337..fda76282d34b 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -248,8 +248,13 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CM else QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) endif +else ifeq ($(ARCH),um) +CHOST := $(HOST_ARCH)-linux-musl +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +KERNEL_ARCH := um +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/um.config) else -$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x) +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x, um) endif TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz @@ -262,7 +267,9 @@ $(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_ STRIP := $(CHOST)-strip CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) $(info Building for $(CHOST) using $(CBUILD)) +ifneq ($(ARCH),um) export CROSS_COMPILE := $(CHOST)- +endif export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) export CC := $(CHOST)-gcc CCACHE_PATH := $(shell which ccache 2>/dev/null) @@ -279,6 +286,7 @@ comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result +ifneq ($(ARCH),um) timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ -nodefaults \ -nographic \ @@ -291,6 +299,13 @@ qemu: $(KERNEL_BZIMAGE) -no-reboot \ -monitor none \ -kernel $< +else + timeout --foreground 20m $< \ + $(KERNEL_CMDLINE) \ + mem=$$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + noreboot \ + con1=fd:51 51>$(BUILD_PATH)/result &1 | cat +endif grep -Fq success $(BUILD_PATH)/result $(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init diff --git a/tools/testing/selftests/wireguard/qemu/arch/um.config b/tools/testing/selftests/wireguard/qemu/arch/um.config new file mode 100644 index 000000000000..c8b229e0810e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/um.config @@ -0,0 +1,3 @@ +CONFIG_64BIT=y +CONFIG_CMDLINE="wg.success=tty1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 -- cgit v1.2.3-59-g8ed1b