diff options
Diffstat (limited to 'tools')
48 files changed, 4643 insertions, 256 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst new file mode 100644 index 000000000000..e3eb0eab7641 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -0,0 +1,81 @@ +================ +bpftool-perf +================ +------------------------------------------------------------------------------- +tool for inspection of perf related bpf prog attachments +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **perf** *COMMAND* + + *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** | **help** } + +PERF COMMANDS +============= + +| **bpftool** **perf { show | list }** +| **bpftool** **perf help** + +DESCRIPTION +=========== + **bpftool perf { show | list }** + List all raw_tracepoint, tracepoint, kprobe attachment in the system. + + Output will start with process id and file descriptor in that process, + followed by bpf program id, attachment information, and attachment point. + The attachment point for raw_tracepoint/tracepoint is the trace probe name. + The attachment point for k[ret]probe is either symbol name and offset, + or a kernel virtual address. + The attachment point for u[ret]probe is the file name and the file offset. + + **bpftool perf help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== + +| **# bpftool perf** + +:: + + pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0 + pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0 + pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep + pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159 + +| +| **# bpftool -j perf** + +:: + + [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \ + {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \ + {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \ + {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}] + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 564cb0d9692b..b6f5d560460d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -30,6 +30,8 @@ SYNOPSIS *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } + *PERF-COMMANDS* := { **show** | **list** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -56,3 +58,4 @@ OPTIONS SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index b301c9b315f1..7bc198d60de2 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -448,6 +448,15 @@ _bpftool() ;; esac ;; + perf) + case $command in + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 1ec852d21d44..eea7f14355f3 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -87,7 +87,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup }\n" + " OBJECT := { prog | map | cgroup | perf }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -216,6 +216,7 @@ static const struct cmd cmds[] = { { "prog", do_prog }, { "map", do_map }, { "cgroup", do_cgroup }, + { "perf", do_perf }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 6173cd997e7a..63fdb310b9a4 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -119,6 +119,7 @@ int do_prog(int argc, char **arg); int do_map(int argc, char **arg); int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); +int do_perf(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c new file mode 100644 index 000000000000..ac6b1a12c9b7 --- /dev/null +++ b/tools/bpf/bpftool/perf.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook +// Author: Yonghong Song <yhs@fb.com> + +#define _GNU_SOURCE +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <ftw.h> + +#include <bpf.h> + +#include "main.h" + +/* 0: undecided, 1: supported, 2: not supported */ +static int perf_query_supported; +static bool has_perf_query_support(void) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + char buf[256]; + int fd; + + if (perf_query_supported) + goto out; + + fd = open(bin_name, O_RDONLY); + if (fd < 0) { + p_err("perf_query_support: %s", strerror(errno)); + goto out; + } + + /* the following query will fail as no bpf attachment, + * the expected errno is ENOTSUPP + */ + errno = 0; + len = sizeof(buf); + bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + + if (errno == 524 /* ENOTSUPP */) { + perf_query_supported = 1; + goto close_fd; + } + + perf_query_supported = 2; + p_err("perf_query_support: %s", strerror(errno)); + fprintf(stderr, + "HINT: non root or kernel doesn't support TASK_FD_QUERY\n"); + +close_fd: + close(fd); +out: + return perf_query_supported == 1; +} + +static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type, + char *buf, __u64 probe_offset, __u64 probe_addr) +{ + jsonw_start_object(json_wtr); + jsonw_int_field(json_wtr, "pid", pid); + jsonw_int_field(json_wtr, "fd", fd); + jsonw_uint_field(json_wtr, "prog_id", prog_id); + switch (fd_type) { + case BPF_FD_TYPE_RAW_TRACEPOINT: + jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint"); + jsonw_string_field(json_wtr, "tracepoint", buf); + break; + case BPF_FD_TYPE_TRACEPOINT: + jsonw_string_field(json_wtr, "fd_type", "tracepoint"); + jsonw_string_field(json_wtr, "tracepoint", buf); + break; + case BPF_FD_TYPE_KPROBE: + jsonw_string_field(json_wtr, "fd_type", "kprobe"); + if (buf[0] != '\0') { + jsonw_string_field(json_wtr, "func", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + } else { + jsonw_lluint_field(json_wtr, "addr", probe_addr); + } + break; + case BPF_FD_TYPE_KRETPROBE: + jsonw_string_field(json_wtr, "fd_type", "kretprobe"); + if (buf[0] != '\0') { + jsonw_string_field(json_wtr, "func", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + } else { + jsonw_lluint_field(json_wtr, "addr", probe_addr); + } + break; + case BPF_FD_TYPE_UPROBE: + jsonw_string_field(json_wtr, "fd_type", "uprobe"); + jsonw_string_field(json_wtr, "filename", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + break; + case BPF_FD_TYPE_URETPROBE: + jsonw_string_field(json_wtr, "fd_type", "uretprobe"); + jsonw_string_field(json_wtr, "filename", buf); + jsonw_lluint_field(json_wtr, "offset", probe_offset); + break; + } + jsonw_end_object(json_wtr); +} + +static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type, + char *buf, __u64 probe_offset, __u64 probe_addr) +{ + printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id); + switch (fd_type) { + case BPF_FD_TYPE_RAW_TRACEPOINT: + printf("raw_tracepoint %s\n", buf); + break; + case BPF_FD_TYPE_TRACEPOINT: + printf("tracepoint %s\n", buf); + break; + case BPF_FD_TYPE_KPROBE: + if (buf[0] != '\0') + printf("kprobe func %s offset %llu\n", buf, + probe_offset); + else + printf("kprobe addr %llu\n", probe_addr); + break; + case BPF_FD_TYPE_KRETPROBE: + if (buf[0] != '\0') + printf("kretprobe func %s offset %llu\n", buf, + probe_offset); + else + printf("kretprobe addr %llu\n", probe_addr); + break; + case BPF_FD_TYPE_UPROBE: + printf("uprobe filename %s offset %llu\n", buf, probe_offset); + break; + case BPF_FD_TYPE_URETPROBE: + printf("uretprobe filename %s offset %llu\n", buf, + probe_offset); + break; + } +} + +static int show_proc(const char *fpath, const struct stat *sb, + int tflag, struct FTW *ftwbuf) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + int err, pid = 0, fd = 0; + const char *pch; + char buf[4096]; + + /* prefix always /proc */ + pch = fpath + 5; + if (*pch == '\0') + return 0; + + /* pid should be all numbers */ + pch++; + while (isdigit(*pch)) { + pid = pid * 10 + *pch - '0'; + pch++; + } + if (*pch == '\0') + return 0; + if (*pch != '/') + return FTW_SKIP_SUBTREE; + + /* check /proc/<pid>/fd directory */ + pch++; + if (strncmp(pch, "fd", 2)) + return FTW_SKIP_SUBTREE; + pch += 2; + if (*pch == '\0') + return 0; + if (*pch != '/') + return FTW_SKIP_SUBTREE; + + /* check /proc/<pid>/fd/<fd_num> */ + pch++; + while (isdigit(*pch)) { + fd = fd * 10 + *pch - '0'; + pch++; + } + if (*pch != '\0') + return FTW_SKIP_SUBTREE; + + /* query (pid, fd) for potential perf events */ + len = sizeof(buf); + err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type, + &probe_offset, &probe_addr); + if (err < 0) + return 0; + + if (json_output) + print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset, + probe_addr); + else + print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset, + probe_addr); + + return 0; +} + +static int do_show(int argc, char **argv) +{ + int flags = FTW_ACTIONRETVAL | FTW_PHYS; + int err = 0, nopenfd = 16; + + if (!has_perf_query_support()) + return -1; + + if (json_output) + jsonw_start_array(json_wtr); + if (nftw("/proc", show_proc, nopenfd, flags) == -1) { + p_err("%s", strerror(errno)); + err = -1; + } + if (json_output) + jsonw_end_array(json_wtr); + + return err; +} + +static int do_help(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s %s { show | list | help }\n" + "", + bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_perf(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 9bdfdf2d3fbe..39b88e760367 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -420,7 +420,11 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; + unsigned int *func_lens = NULL; + unsigned int nr_func_ksyms; + unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); unsigned int buf_size; @@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv) return -1; } + nr_func_ksyms = info.nr_jited_ksyms; + if (nr_func_ksyms) { + func_ksyms = malloc(nr_func_ksyms * sizeof(__u64)); + if (!func_ksyms) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + nr_func_lens = info.nr_jited_func_lens; + if (nr_func_lens) { + func_lens = malloc(nr_func_lens * sizeof(__u32)); + if (!func_lens) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); *member_len = buf_size; + info.jited_ksyms = ptr_to_u64(func_ksyms); + info.nr_jited_ksyms = nr_func_ksyms; + info.jited_func_lens = ptr_to_u64(func_lens); + info.nr_jited_func_lens = nr_func_lens; err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.nr_jited_ksyms > nr_func_ksyms) { + p_err("too many addresses returned"); + goto err_free; + } + + if (info.nr_jited_func_lens > nr_func_lens) { + p_err("too many values returned"); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv) goto err_free; } - disasm_print_insn(buf, *member_len, opcodes, name); + if (info.nr_jited_func_lens && info.jited_func_lens) { + struct kernel_sym *sym = NULL; + char sym_name[SYM_MAX_NAME]; + unsigned char *img = buf; + __u64 *ksyms = NULL; + __u32 *lens; + __u32 i; + + if (info.nr_jited_ksyms) { + kernel_syms_load(&dd); + ksyms = (__u64 *) info.jited_ksyms; + } + + if (json_output) + jsonw_start_array(json_wtr); + + lens = (__u32 *) info.jited_func_lens; + for (i = 0; i < info.nr_jited_func_lens; i++) { + if (ksyms) { + sym = kernel_syms_search(&dd, ksyms[i]); + if (sym) + sprintf(sym_name, "%s", sym->name); + else + sprintf(sym_name, "0x%016llx", ksyms[i]); + } else { + strcpy(sym_name, "unknown"); + } + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "name"); + jsonw_string(json_wtr, sym_name); + jsonw_name(json_wtr, "insns"); + } else { + printf("%s:\n", sym_name); + } + + disasm_print_insn(img, lens[i], opcodes, name); + img += lens[i]; + + if (json_output) + jsonw_end_object(json_wtr); + else + printf("\n"); + } + + if (json_output) + jsonw_end_array(json_wtr); + } else { + disasm_print_insn(buf, *member_len, opcodes, name); + } } else if (visual) { if (json_output) jsonw_null(json_wtr); @@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv) dump_xlated_cfg(buf, *member_len); } else { kernel_syms_load(&dd); + dd.nr_jited_ksyms = info.nr_jited_ksyms; + dd.jited_ksyms = (__u64 *) info.jited_ksyms; + if (json_output) dump_xlated_json(&dd, buf, *member_len, opcodes); else @@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv) } free(buf); + free(func_ksyms); + free(func_lens); return 0; err_free: free(buf); + free(func_ksyms); + free(func_lens); return -1; } diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 7a3173b76c16..b97f1da60dd1 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd) free(dd->sym_mapping); } -static struct kernel_sym *kernel_syms_search(struct dump_data *dd, - unsigned long key) +struct kernel_sym *kernel_syms_search(struct dump_data *dd, + unsigned long key) { struct kernel_sym sym = { .address = key, @@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd, unsigned long address, const struct bpf_insn *insn) { - if (sym) + if (!dd->nr_jited_ksyms) + /* Do not show address for interpreted programs */ + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "%+d", insn->off); + else if (sym) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "%+d#%s", insn->off, sym->name); else @@ -203,6 +207,10 @@ static const char *print_call(void *private_data, unsigned long address = dd->address_call_base + insn->imm; struct kernel_sym *sym; + if (insn->src_reg == BPF_PSEUDO_CALL && + (__u32) insn->imm < dd->nr_jited_ksyms) + address = dd->jited_ksyms[insn->imm]; + sym = kernel_syms_search(dd, address); if (insn->src_reg == BPF_PSEUDO_CALL) return print_call_pcrel(dd, sym, address, insn); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index b34affa7ef2d..33d86e2b369b 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -49,11 +49,14 @@ struct dump_data { unsigned long address_call_base; struct kernel_sym *sym_mapping; __u32 sym_count; + __u64 *jited_ksyms; + __u32 nr_jited_ksyms; char scratch_buff[SYM_MAX_NAME + 8]; }; void kernel_syms_load(struct dump_data *dd); void kernel_syms_destroy(struct dump_data *dd); +struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key); void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, bool opcodes); void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d94d333a8225..671486133988 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -97,6 +97,7 @@ enum bpf_cmd { BPF_RAW_TRACEPOINT_OPEN, BPF_BTF_LOAD, BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, }; enum bpf_map_type { @@ -141,6 +142,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_RAW_TRACEPOINT, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_PROG_TYPE_LWT_SEG6LOCAL, }; enum bpf_attach_type { @@ -284,8 +286,8 @@ union bpf_attr { char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ __u32 btf_fd; /* fd pointing to a BTF type data */ - __u32 btf_key_id; /* BTF type_id of the key */ - __u32 btf_value_id; /* BTF type_id of the value */ + __u32 btf_key_type_id; /* BTF type_id of the key */ + __u32 btf_value_type_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -379,6 +381,22 @@ union bpf_attr { __u32 btf_log_size; __u32 btf_log_level; }; + + struct { + __u32 pid; /* input: pid */ + __u32 fd; /* input: fd */ + __u32 flags; /* input: flags */ + __u32 buf_len; /* input/output: buf len */ + __aligned_u64 buf; /* input/output: + * tp_name for tracepoint + * symbol for kprobe + * filename for uprobe + */ + __u32 prog_id; /* output: prod_id */ + __u32 fd_type; /* output: BPF_FD_TYPE_* */ + __u64 probe_offset; /* output: probe_offset */ + __u64 probe_addr; /* output: probe_addr */ + } task_fd_query; } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -1902,6 +1920,90 @@ union bpf_attr { * egress otherwise). This is the only flag supported for now. * Return * **SK_PASS** on success, or **SK_DROP** on error. + * + * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * Description + * Encapsulate the packet associated to *skb* within a Layer 3 + * protocol header. This header is provided in the buffer at + * address *hdr*, with *len* its size in bytes. *type* indicates + * the protocol of the header and can be one of: + * + * **BPF_LWT_ENCAP_SEG6** + * IPv6 encapsulation with Segment Routing Header + * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH, + * the IPv6 header is computed by the kernel. + * **BPF_LWT_ENCAP_SEG6_INLINE** + * Only works if *skb* contains an IPv6 packet. Insert a + * Segment Routing Header (**struct ipv6_sr_hdr**) inside + * the IPv6 header. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * Description + * Store *len* bytes from address *from* into the packet + * associated to *skb*, at *offset*. Only the flags, tag and TLVs + * inside the outermost IPv6 Segment Routing Header can be + * modified through this helper. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * Description + * Adjust the size allocated to TLVs in the outermost IPv6 + * Segment Routing Header contained in the packet associated to + * *skb*, at position *offset* by *delta* bytes. Only offsets + * after the segments are accepted. *delta* can be as well + * positive (growing) as negative (shrinking). + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * Description + * Apply an IPv6 Segment Routing action of type *action* to the + * packet associated to *skb*. Each action takes a parameter + * contained at address *param*, and of length *param_len* bytes. + * *action* can be one of: + * + * **SEG6_LOCAL_ACTION_END_X** + * End.X action: Endpoint with Layer-3 cross-connect. + * Type of *param*: **struct in6_addr**. + * **SEG6_LOCAL_ACTION_END_T** + * End.T action: Endpoint with specific IPv6 table lookup. + * Type of *param*: **int**. + * **SEG6_LOCAL_ACTION_END_B6** + * End.B6 action: Endpoint bound to an SRv6 policy. + * Type of param: **struct ipv6_sr_hdr**. + * **SEG6_LOCAL_ACTION_END_B6_ENCAP** + * End.B6.Encap action: Endpoint bound to an SRv6 + * encapsulation policy. + * Type of param: **struct ipv6_sr_hdr**. + * + * A call to this helper is susceptible to change the underlaying + * packet buffer. Therefore, at load time, all checks on pointers + * previously done by the verifier are invalidated and must be + * performed again, if the helper is used in combination with + * direct packet access. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1976,7 +2078,11 @@ union bpf_attr { FN(fib_lookup), \ FN(sock_hash_update), \ FN(msg_redirect_hash), \ - FN(sk_redirect_hash), + FN(sk_redirect_hash), \ + FN(lwt_push_encap), \ + FN(lwt_seg6_store_bytes), \ + FN(lwt_seg6_adjust_srh), \ + FN(lwt_seg6_action), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off { BPF_HDR_START_NET, }; +/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ +enum bpf_lwt_encap_mode { + BPF_LWT_ENCAP_SEG6, + BPF_LWT_ENCAP_SEG6_INLINE +}; + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2176,6 +2288,14 @@ enum sk_action { struct sk_msg_md { void *data; void *data_end; + + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; #define BPF_TAG_SIZE 8 @@ -2197,6 +2317,10 @@ struct bpf_prog_info { __u32 gpl_compatible:1; __u64 netns_dev; __u64 netns_ino; + __u32 nr_jited_ksyms; + __u32 nr_jited_func_lens; + __aligned_u64 jited_ksyms; + __aligned_u64 jited_func_lens; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2208,11 +2332,12 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; __u32 btf_id; - __u32 btf_key_id; - __u32 btf_value_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -2450,4 +2575,13 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +enum bpf_task_fd_type { + BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_TRACEPOINT, /* tp name */ + BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */ + BPF_FD_TYPE_UPROBE, /* filename + offset */ + BPF_FD_TYPE_URETPROBE, /* filename + offset */ +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index bcb56ee47014..0b5ddbe135a4 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -12,42 +12,29 @@ struct btf_header { __u16 magic; __u8 version; __u8 flags; - - __u32 parent_label; - __u32 parent_name; + __u32 hdr_len; /* All offsets are in bytes relative to the end of this header */ - __u32 label_off; /* offset of label section */ - __u32 object_off; /* offset of data object section*/ - __u32 func_off; /* offset of function section */ __u32 type_off; /* offset of type section */ + __u32 type_len; /* length of type section */ __u32 str_off; /* offset of string section */ __u32 str_len; /* length of string section */ }; /* Max # of type identifier */ -#define BTF_MAX_TYPE 0x7fffffff +#define BTF_MAX_TYPE 0x0000ffff /* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x7fffffff +#define BTF_MAX_NAME_OFFSET 0x0000ffff /* Max # of struct/union/enum members or func args */ #define BTF_MAX_VLEN 0xffff -/* The type id is referring to a parent BTF */ -#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1) -#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE) - -/* String is in the ELF string section */ -#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1) -#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET) - struct btf_type { __u32 name_off; /* "info" bits arrangement * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused - * bits 24-28: kind (e.g. int, ptr, array...etc) - * bits 29-30: unused - * bits 31: root + * bits 24-27: kind (e.g. int, ptr, array...etc) + * bits 28-31: unused */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. @@ -62,8 +49,7 @@ struct btf_type { }; }; -#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f) -#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80)) +#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) #define BTF_KIND_UNKN 0 /* Unknown */ @@ -88,15 +74,14 @@ struct btf_type { /* BTF_KIND_INT is followed by a u32 and the following * is the 32 bits arrangement: */ -#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24) +#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24) #define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16) #define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff) /* Attributes stored in the BTF_INT_ENCODING */ -#define BTF_INT_SIGNED 0x1 -#define BTF_INT_CHAR 0x2 -#define BTF_INT_BOOL 0x4 -#define BTF_INT_VARARGS 0x8 +#define BTF_INT_SIGNED (1 << 0) +#define BTF_INT_CHAR (1 << 1) +#define BTF_INT_BOOL (1 << 2) /* BTF_KIND_ENUM is followed by multiple "struct btf_enum". * The exact number of btf_enum is stored in the vlen (of the diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 6a8a00097fd8..9ddc89dae962 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) min(name_len, BPF_OBJ_NAME_LEN - 1)); attr.numa_node = create_attr->numa_node; attr.btf_fd = create_attr->btf_fd; - attr.btf_key_id = create_attr->btf_key_id; - attr.btf_value_id = create_attr->btf_value_id; + attr.btf_key_type_id = create_attr->btf_key_type_id; + attr.btf_value_type_id = create_attr->btf_value_type_id; attr.map_ifindex = create_attr->map_ifindex; return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); @@ -643,3 +643,26 @@ retry: return fd; } + +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr) +{ + union bpf_attr attr = {}; + int err; + + attr.task_fd_query.pid = pid; + attr.task_fd_query.fd = fd; + attr.task_fd_query.flags = flags; + attr.task_fd_query.buf = ptr_to_u64(buf); + attr.task_fd_query.buf_len = *buf_len; + + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + *buf_len = attr.task_fd_query.buf_len; + *prog_id = attr.task_fd_query.prog_id; + *fd_type = attr.task_fd_query.fd_type; + *probe_offset = attr.task_fd_query.probe_offset; + *probe_addr = attr.task_fd_query.probe_addr; + + return err; +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 15bff7728cf1..0639a30a457d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -36,8 +36,8 @@ struct bpf_create_map_attr { __u32 max_entries; __u32 numa_node; __u32 btf_fd; - __u32 btf_key_id; - __u32 btf_value_id; + __u32 btf_key_type_id; + __u32 btf_value_type_id; __u32 map_ifindex; }; @@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, int bpf_raw_tracepoint_open(const char *name, int prog_fd); int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log); +int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, + __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, + __u64 *probe_addr); #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2bac710e3194..8c54a4b6f187 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -35,9 +35,8 @@ struct btf { static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset) { - if (!BTF_STR_TBL_ELF_ID(offset) && - BTF_STR_OFFSET(offset) < btf->hdr->str_len) - return &btf->strings[BTF_STR_OFFSET(offset)]; + if (offset < btf->hdr->str_len) + return &btf->strings[offset]; else return NULL; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cbdf34a6fb93..d20411ebfa2f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -216,8 +216,8 @@ struct bpf_map { size_t offset; int map_ifindex; struct bpf_map_def def; - uint32_t btf_key_id; - uint32_t btf_value_id; + uint32_t btf_key_type_id; + uint32_t btf_value_type_id; void *priv; bpf_map_clear_priv_t clear_priv; }; @@ -1042,8 +1042,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) } if (def->key_size != key_size) { - pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n", - map->name, name, key_size, def->key_size); + pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n", + map->name, name, (unsigned int)key_size, def->key_size); return -EINVAL; } @@ -1069,13 +1069,13 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) } if (def->value_size != value_size) { - pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n", - map->name, name, value_size, def->value_size); + pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n", + map->name, name, (unsigned int)value_size, def->value_size); return -EINVAL; } - map->btf_key_id = key_id; - map->btf_value_id = value_id; + map->btf_key_type_id = key_id; + map->btf_value_type_id = value_id; return 0; } @@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.value_size = def->value_size; create_attr.max_entries = def->max_entries; create_attr.btf_fd = 0; - create_attr.btf_key_id = 0; - create_attr.btf_value_id = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { create_attr.btf_fd = btf__fd(obj->btf); - create_attr.btf_key_id = map->btf_key_id; - create_attr.btf_value_id = map->btf_value_id; + create_attr.btf_key_type_id = map->btf_key_type_id; + create_attr.btf_value_type_id = map->btf_value_type_id; } *pfd = bpf_create_map_xattr(&create_attr); - if (*pfd < 0 && create_attr.btf_key_id) { + if (*pfd < 0 && create_attr.btf_key_type_id) { pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, strerror(errno), errno); create_attr.btf_fd = 0; - create_attr.btf_key_id = 0; - create_attr.btf_value_id = 0; - map->btf_key_id = 0; - map->btf_value_id = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; *pfd = bpf_create_map_xattr(&create_attr); } @@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_CGROUP_DEVICE: @@ -2085,14 +2086,14 @@ const char *bpf_map__name(struct bpf_map *map) return map ? map->name : NULL; } -uint32_t bpf_map__btf_key_id(const struct bpf_map *map) +uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map) { - return map ? map->btf_key_id : 0; + return map ? map->btf_key_type_id : 0; } -uint32_t bpf_map__btf_value_id(const struct bpf_map *map) +uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map) { - return map ? map->btf_value_id : 0; + return map ? map->btf_value_type_id : 0; } int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index cd3fd8d782c7..09976531aa74 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); int bpf_map__fd(struct bpf_map *map); const struct bpf_map_def *bpf_map__def(struct bpf_map *map); const char *bpf_map__name(struct bpf_map *map); -uint32_t bpf_map__btf_key_id(const struct bpf_map *map); -uint32_t bpf_map__btf_value_id(const struct bpf_map *map); +uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map); +uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); int bpf_map__set_priv(struct bpf_map *map, void *priv, diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1eb0fa2aba92..85044448bbc7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ - test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o + test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ + test_lwt_seg6local.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \ test_xdp_meta.sh \ test_offload.py \ test_sock_addr.sh \ - test_tunnel.sh + test_tunnel.sh \ + test_lwt_seg6local.sh # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr @@ -84,7 +86,17 @@ else CPU ?= generic endif +# Get Clang's default includes on this system, as opposed to those seen by +# '-target bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ + $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 8f143dfb3700..334d3e8c5e89 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) BPF_FUNC_fib_lookup; +static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr, + unsigned int len) = + (void *) BPF_FUNC_lwt_push_encap; +static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset, + void *from, unsigned int len) = + (void *) BPF_FUNC_lwt_seg6_store_bytes; +static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param, + unsigned int param_len) = + (void *) BPF_FUNC_lwt_seg6_action; +static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset, + unsigned int len) = + (void *) BPF_FUNC_lwt_seg6_adjust_srh; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index c8bceae7ec02..35064df688c1 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -113,22 +113,25 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE]; static struct btf_header hdr_tmpl = { .magic = BTF_MAGIC, .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), }; struct btf_raw_test { const char *descr; const char *str_sec; const char *map_name; + const char *err_str; __u32 raw_types[MAX_NR_RAW_TYPES]; __u32 str_sec_size; enum bpf_map_type map_type; __u32 key_size; __u32 value_size; - __u32 key_id; - __u32 value_id; + __u32 key_type_id; + __u32 value_type_id; __u32 max_entries; bool btf_load_err; bool map_create_err; + int hdr_len_delta; int type_off_delta; int str_off_delta; int str_len_delta; @@ -141,8 +144,8 @@ static struct btf_raw_test raw_tests[] = { * }; * * struct A { - * int m; - * unsigned long long n; + * unsigned long long m; + * int n; * char o; * [3 bytes hole] * int p[8]; @@ -163,8 +166,8 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */ /* struct A { */ /* [5] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180), - BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/ + BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */ BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */ @@ -172,6 +175,7 @@ static struct btf_raw_test raw_tests[] = { /* } */ /* int[4][8] */ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */ + /* enum E */ /* [7] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)), BTF_ENUM_ENC(NAME_TBD, 0), BTF_ENUM_ENC(NAME_TBD, 1), @@ -183,8 +187,8 @@ static struct btf_raw_test raw_tests[] = { .map_name = "struct_test1_map", .key_size = sizeof(int), .value_size = 180, - .key_id = 1, - .value_id = 5, + .key_type_id = 1, + .value_type_id = 5, .max_entries = 4, }, @@ -238,8 +242,8 @@ static struct btf_raw_test raw_tests[] = { .map_name = "struct_test2_map", .key_size = sizeof(int), .value_size = 68, - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, }, @@ -258,7 +262,7 @@ static struct btf_raw_test raw_tests[] = { /* struct A { */ /* [2] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1), BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */ - BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */ + BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */ /* } */ BTF_END_RAW, }, @@ -268,10 +272,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "size_check1_map", .key_size = sizeof(int), .value_size = 1, - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Member exceeds struct_size", }, /* Test member exeeds the size of struct @@ -301,11 +306,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "size_check2_map", .key_size = sizeof(int), .value_size = 1, - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, .btf_load_err = true, - + .err_str = "Member exceeds struct_size", }, /* Test member exeeds the size of struct @@ -335,10 +340,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "size_check3_map", .key_size = sizeof(int), .value_size = 1, - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, .btf_load_err = true, + .err_str = "Member exceeds struct_size", }, /* Test member exceeds the size of struct @@ -376,10 +382,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "size_check4_map", .key_size = sizeof(int), .value_size = 1, - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, .btf_load_err = true, + .err_str = "Member exceeds struct_size", }, /* typedef const void * const_void_ptr; @@ -411,8 +418,8 @@ static struct btf_raw_test raw_tests[] = { .map_name = "void_test1_map", .key_size = sizeof(int), .value_size = sizeof(void *), - .key_id = 1, - .value_id = 4, + .key_type_id = 1, + .value_type_id = 4, .max_entries = 4, }, @@ -440,10 +447,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "void_test2_map", .key_size = sizeof(int), .value_size = sizeof(void *), - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, .btf_load_err = true, + .err_str = "Invalid member", }, /* typedef const void * const_void_ptr; @@ -458,9 +466,9 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ + /* typedef const void * const_void_ptr */ /* [4] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* const_void_ptr[4] */ /* [4] */ + /* const_void_ptr[4] */ /* [5] */ BTF_TYPE_ARRAY_ENC(3, 1, 4), BTF_END_RAW, }, @@ -470,8 +478,8 @@ static struct btf_raw_test raw_tests[] = { .map_name = "void_test3_map", .key_size = sizeof(int), .value_size = sizeof(void *) * 4, - .key_id = 1, - .value_id = 4, + .key_type_id = 1, + .value_type_id = 4, .max_entries = 4, }, @@ -493,10 +501,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "void_test4_map", .key_size = sizeof(int), .value_size = sizeof(void *) * 4, - .key_id = 1, - .value_id = 3, + .key_type_id = 1, + .value_type_id = 3, .max_entries = 4, .btf_load_err = true, + .err_str = "Invalid elem", }, /* Array_A <------------------+ @@ -523,10 +532,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test1_map", .key_size = sizeof(int), .value_size = sizeof(sizeof(int) * 8), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, /* typedef is _before_ the BTF type of Array_A and Array_B @@ -551,7 +561,6 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */ /* Array_B */ BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */ - BTF_END_RAW, }, .str_sec = "\0int_array\0", @@ -560,10 +569,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test2_map", .key_size = sizeof(int), .value_size = sizeof(sizeof(int) * 8), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, /* Array_A <------------------+ @@ -582,7 +592,6 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ARRAY_ENC(3, 1, 8), /* Array_B */ /* [3] */ BTF_TYPE_ARRAY_ENC(2, 1, 8), - BTF_END_RAW, }, .str_sec = "", @@ -591,10 +600,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test3_map", .key_size = sizeof(int), .value_size = sizeof(sizeof(int) * 8), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, /* typedef is _between_ the BTF type of Array_A and Array_B @@ -627,10 +637,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test4_map", .key_size = sizeof(int), .value_size = sizeof(sizeof(int) * 8), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, /* typedef struct B Struct_B @@ -668,10 +679,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test5_map", .key_size = sizeof(int), .value_size = 8, - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, /* struct A { @@ -697,10 +709,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test6_map", .key_size = sizeof(int), .value_size = 8, - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, { @@ -724,10 +737,11 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test7_map", .key_size = sizeof(int), .value_size = sizeof(void *), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, { @@ -759,34 +773,73 @@ static struct btf_raw_test raw_tests[] = { .map_name = "loop_test8_map", .key_size = sizeof(int), .value_size = sizeof(void *), - .key_id = 1, - .value_id = 2, + .key_type_id = 1, + .value_type_id = 2, .max_entries = 4, .btf_load_err = true, + .err_str = "Loop detected", }, { - .descr = "type_off == str_off", + .descr = "string section does not end with null", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), BTF_END_RAW, }, .str_sec = "\0int", + .str_sec_size = sizeof("\0int") - 1, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty string section", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = 0, + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid string section", +}, + +{ + .descr = "empty type section", + .raw_types = { + BTF_END_RAW, + }, + .str_sec = "\0int", .str_sec_size = sizeof("\0int"), .map_type = BPF_MAP_TYPE_ARRAY, .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"), + .err_str = "No type found", }, { - .descr = "Unaligned type_off", + .descr = "btf_header test. Longer hdr_len", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), @@ -798,15 +851,16 @@ static struct btf_raw_test raw_tests[] = { .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .type_off_delta = 1, + .hdr_len_delta = 4, + .err_str = "Unsupported btf_header", }, { - .descr = "str_off beyonds btf size", + .descr = "btf_header test. Gap between hdr and type", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), @@ -818,15 +872,16 @@ static struct btf_raw_test raw_tests[] = { .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .str_off_delta = sizeof("\0int") + 1, + .type_off_delta = 4, + .err_str = "Unsupported section found", }, { - .descr = "str_len beyonds btf size", + .descr = "btf_header test. Gap between type and str", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), @@ -838,15 +893,16 @@ static struct btf_raw_test raw_tests[] = { .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .str_len_delta = 1, + .str_off_delta = 4, + .err_str = "Unsupported section found", }, { - .descr = "String section does not end with null", + .descr = "btf_header test. Overlap between type and str", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), @@ -858,15 +914,16 @@ static struct btf_raw_test raw_tests[] = { .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .str_len_delta = -1, + .str_off_delta = -4, + .err_str = "Section overlap found", }, { - .descr = "Empty string section", + .descr = "btf_header test. Larger BTF size", .raw_types = { /* int */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), @@ -878,11 +935,288 @@ static struct btf_raw_test raw_tests[] = { .map_name = "hdr_test_map", .key_size = sizeof(int), .value_size = sizeof(int), - .key_id = 1, - .value_id = 1, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = -4, + .err_str = "Unsupported section found", +}, + +{ + .descr = "btf_header test. Smaller BTF size", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + .str_sec = "\0int", + .str_sec_size = sizeof("\0int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "hdr_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .str_len_delta = 4, + .err_str = "Total section length too long", +}, + +{ + .descr = "array test. index_type/elem_type \"int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type/elem_type \"const int\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=1 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(1, 4, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const int:31\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int:31 */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4), + /* int[16] */ /* [3] */ + BTF_TYPE_ARRAY_ENC(4, 1, 16), + /* CONST type_id=2 */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid array of int", +}, + +{ + .descr = "array test. index_type \"void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 0, 16), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. index_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(1, 3, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid index", +}, + +{ + .descr = "array test. elem_type \"const void\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* int[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=0 (void) */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid elem", +}, + +{ + .descr = "array test. elem_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 1, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "array test. index_type \"const void *\"", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + /* const void *[16] */ /* [2] */ + BTF_TYPE_ARRAY_ENC(3, 3, 16), + /* CONST type_id=4 */ /* [3] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4), + /* void* */ /* [4] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .str_len_delta = 0 - (int)sizeof("\0int"), + .err_str = "Invalid index", +}, + +{ + .descr = "int test. invalid int_data", + .raw_types = { + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4), + 0x10000000, + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid int_data", +}, + +{ + .descr = "invalid BTF_INFO", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_ENC(0, 0x10000000, 4), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_test_map", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info", }, }; /* struct btf_raw_test raw_tests[] */ @@ -951,6 +1285,7 @@ static void *btf_raw_create(const struct btf_header *hdr, memcpy(raw_btf + offset, str, str_sec_size); ret_hdr = (struct btf_header *)raw_btf; + ret_hdr->type_len = type_sec_size; ret_hdr->str_off = type_sec_size; ret_hdr->str_len = str_sec_size; @@ -981,6 +1316,7 @@ static int do_test_raw(unsigned int test_num) hdr = raw_btf; + hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta; hdr->type_off = (int)hdr->type_off + test->type_off_delta; hdr->str_off = (int)hdr->str_off + test->str_off_delta; hdr->str_len = (int)hdr->str_len + test->str_len_delta; @@ -992,8 +1328,13 @@ static int do_test_raw(unsigned int test_num) free(raw_btf); err = ((btf_fd == -1) != test->btf_load_err); - CHECK(err, "btf_fd:%d test->btf_load_err:%u", - btf_fd, test->btf_load_err); + if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", + btf_fd, test->btf_load_err) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } if (err || btf_fd == -1) goto done; @@ -1004,8 +1345,8 @@ static int do_test_raw(unsigned int test_num) create_attr.value_size = test->value_size; create_attr.max_entries = test->max_entries; create_attr.btf_fd = btf_fd; - create_attr.btf_key_id = test->key_id; - create_attr.btf_value_id = test->value_id; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; map_fd = bpf_create_map_xattr(&create_attr); @@ -1267,8 +1608,8 @@ static int test_btf_id(unsigned int test_num) create_attr.value_size = sizeof(unsigned int); create_attr.max_entries = 4; create_attr.btf_fd = btf_fd[0]; - create_attr.btf_key_id = 1; - create_attr.btf_value_id = 2; + create_attr.btf_key_type_id = 1; + create_attr.btf_value_type_id = 2; map_fd = bpf_create_map_xattr(&create_attr); if (CHECK(map_fd == -1, "errno:%d", errno)) { @@ -1279,10 +1620,10 @@ static int test_btf_id(unsigned int test_num) info_len = sizeof(map_info); err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); if (CHECK(err || map_info.btf_id != info[0].id || - map_info.btf_key_id != 1 || map_info.btf_value_id != 2, - "err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u", - err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id, - map_info.btf_value_id)) { + map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2, + "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u", + err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id, + map_info.btf_value_type_id)) { err = -1; goto done; } @@ -1542,10 +1883,10 @@ static int do_test_file(unsigned int test_num) goto done; } - err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0) + err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0) != test->btf_kv_notfound; - if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u", - bpf_map__btf_key_id(map), bpf_map__btf_value_id(map), + if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u", + bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map), test->btf_kv_notfound)) goto done; @@ -1615,7 +1956,7 @@ static struct btf_raw_test pprint_test = { /* 28 bits */ /* [7] */ BTF_TYPE_INT_ENC(0, 0, 0, 28, 4), /* uint8_t[8] */ /* [8] */ - BTF_TYPE_ARRAY_ENC(9, 3, 8), + BTF_TYPE_ARRAY_ENC(9, 1, 8), /* typedef unsigned char uint8_t */ /* [9] */ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* typedef unsigned short uint16_t */ /* [10] */ @@ -1654,8 +1995,8 @@ static struct btf_raw_test pprint_test = { .map_name = "pprint_test", .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), - .key_id = 3, /* unsigned int */ - .value_id = 16, /* struct pprint_mapv */ + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ .max_entries = 128 * 1024, }; @@ -1712,8 +2053,8 @@ static int test_pprint(void) create_attr.value_size = test->value_size; create_attr.max_entries = test->max_entries; create_attr.btf_fd = btf_fd; - create_attr.btf_key_id = test->key_id; - create_attr.btf_value_id = test->value_id; + create_attr.btf_key_type_id = test->key_type_id; + create_attr.btf_value_type_id = test->value_type_id; map_fd = bpf_create_map_xattr(&create_attr); if (CHECK(map_fd == -1, "errno:%d", errno)) { diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c new file mode 100644 index 000000000000..0575751bc1bc --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c @@ -0,0 +1,437 @@ +#include <stddef.h> +#include <inttypes.h> +#include <errno.h> +#include <linux/seg6_local.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* Packet parsing state machine helpers. */ +#define cursor_advance(_cursor, _len) \ + ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) + +#define SR6_FLAG_ALERT (1 << 4) + +#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ + 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) +#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ + 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) +#define BPF_PACKET_HEADER __attribute__((packed)) + +struct ip6_t { + unsigned int ver:4; + unsigned int priority:8; + unsigned int flow_label:20; + unsigned short payload_len; + unsigned char next_header; + unsigned char hop_limit; + unsigned long long src_hi; + unsigned long long src_lo; + unsigned long long dst_hi; + unsigned long long dst_lo; +} BPF_PACKET_HEADER; + +struct ip6_addr_t { + unsigned long long hi; + unsigned long long lo; +} BPF_PACKET_HEADER; + +struct ip6_srh_t { + unsigned char nexthdr; + unsigned char hdrlen; + unsigned char type; + unsigned char segments_left; + unsigned char first_segment; + unsigned char flags; + unsigned short tag; + + struct ip6_addr_t segments[0]; +} BPF_PACKET_HEADER; + +struct sr6_tlv_t { + unsigned char type; + unsigned char len; + unsigned char value[0]; +} BPF_PACKET_HEADER; + +__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +{ + void *cursor, *data_end; + struct ip6_srh_t *srh; + struct ip6_t *ip; + uint8_t *ipver; + + data_end = (void *)(long)skb->data_end; + cursor = (void *)(long)skb->data; + ipver = (uint8_t *)cursor; + + if ((void *)ipver + sizeof(*ipver) > data_end) + return NULL; + + if ((*ipver >> 4) != 6) + return NULL; + + ip = cursor_advance(cursor, sizeof(*ip)); + if ((void *)ip + sizeof(*ip) > data_end) + return NULL; + + if (ip->next_header != 43) + return NULL; + + srh = cursor_advance(cursor, sizeof(*srh)); + if ((void *)srh + sizeof(*srh) > data_end) + return NULL; + + if (srh->type != 4) + return NULL; + + return srh; +} + +__attribute__((always_inline)) +int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, + uint32_t old_pad, uint32_t pad_off) +{ + int err; + + if (new_pad != old_pad) { + err = bpf_lwt_seg6_adjust_srh(skb, pad_off, + (int) new_pad - (int) old_pad); + if (err) + return err; + } + + if (new_pad > 0) { + char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0}; + struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; + + pad_tlv->type = SR6_TLV_PADDING; + pad_tlv->len = new_pad - 2; + + err = bpf_lwt_seg6_store_bytes(skb, pad_off, + (void *)pad_tlv_buf, new_pad); + if (err) + return err; + } + + return 0; +} + +__attribute__((always_inline)) +int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t *tlv_off, uint32_t *pad_size, + uint32_t *pad_off) +{ + uint32_t srh_off, cur_off; + int offset_valid = 0; + int err; + + srh_off = (char *)srh - (char *)(long)skb->data; + // cur_off = end of segments, start of possible TLVs + cur_off = srh_off + sizeof(*srh) + + sizeof(struct ip6_addr_t) * (srh->first_segment + 1); + + *pad_off = 0; + + // we can only go as far as ~10 TLVs due to the BPF max stack size + #pragma clang loop unroll(full) + for (int i = 0; i < 10; i++) { + struct sr6_tlv_t tlv; + + if (cur_off == *tlv_off) + offset_valid = 1; + + if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) + break; + + err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); + if (err) + return err; + + if (tlv.type == SR6_TLV_PADDING) { + *pad_size = tlv.len + sizeof(tlv); + *pad_off = cur_off; + + if (*tlv_off == srh_off) { + *tlv_off = cur_off; + offset_valid = 1; + } + break; + + } else if (tlv.type == SR6_TLV_HMAC) { + break; + } + + cur_off += sizeof(tlv) + tlv.len; + } // we reached the padding or HMAC TLVs, or the end of the SRH + + if (*pad_off == 0) + *pad_off = cur_off; + + if (*tlv_off == -1) + *tlv_off = cur_off; + else if (!offset_valid) + return -EINVAL; + + return 0; +} + +__attribute__((always_inline)) +int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, + struct sr6_tlv_t *itlv, uint8_t tlv_size) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + uint32_t partial_srh_len; + int err; + + if (tlv_off != -1) + tlv_off += srh_off; + + if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) + return -EINVAL; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); + if (err) + return err; + + err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); + if (err) + return err; + + // the following can't be moved inside update_tlv_pad because the + // bpf verifier has some issues with it + pad_off += sizeof(*itlv) + itlv->len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t tlv_off) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t partial_srh_len; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + struct sr6_tlv_t tlv; + int err; + + tlv_off += srh_off; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); + if (err) + return err; + + pad_off -= sizeof(tlv) + tlv.len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) +{ + int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + + ((srh->first_segment + 1) << 4); + struct sr6_tlv_t tlv; + + if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) + return 0; + + if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { + struct ip6_addr_t egr_addr; + + if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) + return 0; + + // check if egress TLV value is correct + if (ntohll(egr_addr.hi) == 0xfd00000000000000 && + ntohll(egr_addr.lo) == 0x4) + return 1; + } + + return 0; +} + +// This function will push a SRH with segments fd00::1, fd00::2, fd00::3, +// fd00::4 +SEC("encap_srh") +int __encap_srh(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfd00000000000000; + struct ip6_addr_t *seg; + struct ip6_srh_t *srh; + char srh_buf[72]; // room for 4 segments + int err; + + srh = (struct ip6_srh_t *)srh_buf; + srh->nexthdr = 0; + srh->hdrlen = 8; + srh->type = 4; + srh->segments_left = 3; + srh->first_segment = 3; + srh->flags = 0; + srh->tag = 0; + + seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); + + #pragma clang loop unroll(full) + for (unsigned long long lo = 0; lo < 4; lo++) { + seg->lo = htonll(4 - lo); + seg->hi = htonll(hi); + seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); + } + + err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +// Add an Egress TLV fc00::4, add the flag A, +// and apply End.X action to fc42::1 +SEC("add_egr_x") +int __add_egr_x(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfc42000000000000; + unsigned long long lo = 0x1; + struct ip6_srh_t *srh = get_srh(skb); + uint8_t new_flags = SR6_FLAG_ALERT; + struct ip6_addr_t addr; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; + + err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, + (struct sr6_tlv_t *)&tlv, 20); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + err = bpf_lwt_seg6_store_bytes(skb, offset, + (void *)&new_flags, sizeof(new_flags)); + if (err) + return BPF_DROP; + + addr.lo = htonll(lo); + addr.hi = htonll(hi); + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, + (void *)&addr, sizeof(addr)); + if (err) + return BPF_DROP; + return BPF_REDIRECT; +} + +// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a +// simple End action +SEC("pop_egr") +int __pop_egr(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + uint16_t new_tag = bpf_htons(2442); + uint8_t new_flags = 0; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != SR6_FLAG_ALERT) + return BPF_DROP; + + if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV + return BPF_DROP; + + if (!has_egr_tlv(skb, srh)) + return BPF_DROP; + + err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, + sizeof(new_flags))) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, + sizeof(new_tag))) + return BPF_DROP; + + return BPF_OK; +} + +// Inspect if the Egress TLV and flag have been removed, if the tag is correct, +// then apply a End.T action to reach the last segment +SEC("inspect_t") +int __inspect_t(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + int table = 117; + int err; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != 0) + return BPF_DROP; + + if (srh->tag != bpf_htons(2442)) + return BPF_DROP; + + if (srh->hdrlen != 8) // 4 segments + return BPF_DROP; + + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, + (void *)&table, sizeof(table)); + + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh new file mode 100755 index 000000000000..1c77994b5e71 --- /dev/null +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# Connects 6 network namespaces through veths. +# Each NS may have different IPv6 global scope addresses : +# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6 +# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6 +# fc42::1 fd00::4 +# +# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a +# IPv6 header with a Segment Routing Header, with segments : +# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4 +# +# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions : +# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1 +# - fd00::2 : remove the TLV, change the flags, add a tag +# - fd00::3 : apply an End.T action to fd00::4, through routing table 117 +# +# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet. +# Each End.BPF action will validate the operations applied on the SRH by the +# previous BPF program in the chain, otherwise the packet is dropped. +# +# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this +# datagram can be read on NS6 when binding to fb00::6. + +TMP_FILE="/tmp/selftest_lwt_seg6local.txt" + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_lwt_seg6local [PASS]"; + else + echo "selftests: test_lwt_seg6local [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null + ip netns del ns3 2> /dev/null + ip netns del ns4 2> /dev/null + ip netns del ns5 2> /dev/null + ip netns del ns6 2> /dev/null + rm -f $TMP_FILE +} + +set -e + +ip netns add ns1 +ip netns add ns2 +ip netns add ns3 +ip netns add ns4 +ip netns add ns5 +ip netns add ns6 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 type veth peer name veth2 +ip link add veth3 type veth peer name veth4 +ip link add veth5 type veth peer name veth6 +ip link add veth7 type veth peer name veth8 +ip link add veth9 type veth peer name veth10 + +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 +ip link set veth3 netns ns2 +ip link set veth4 netns ns3 +ip link set veth5 netns ns3 +ip link set veth6 netns ns4 +ip link set veth7 netns ns4 +ip link set veth8 netns ns5 +ip link set veth9 netns ns5 +ip link set veth10 netns ns6 + +ip netns exec ns1 ip link set dev veth1 up +ip netns exec ns2 ip link set dev veth2 up +ip netns exec ns2 ip link set dev veth3 up +ip netns exec ns3 ip link set dev veth4 up +ip netns exec ns3 ip link set dev veth5 up +ip netns exec ns4 ip link set dev veth6 up +ip netns exec ns4 ip link set dev veth7 up +ip netns exec ns5 ip link set dev veth8 up +ip netns exec ns5 ip link set dev veth9 up +ip netns exec ns6 ip link set dev veth10 up +ip netns exec ns6 ip link set dev lo up + +# All link scope addresses and routes required between veths +ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link +ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link +ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link +ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link +ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link +ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link +ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link +ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link +ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link +ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link +ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link +ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link +ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link +ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link +ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link +ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link + +ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo +ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21 + +ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2 +ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link + +ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65 +ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4 + +ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6 +ip netns exec ns4 ip -6 addr add fc42::1 dev lo +ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87 + +ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109 +ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8 + +ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo +ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo + +ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + +ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null +ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null +ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null + +ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE & +ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330" +sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment +kill -INT $! + +if [[ $(< $TMP_FILE) != "foobar" ]]; then + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 3ecf733330c1..0ef68204c84b 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1542,6 +1542,162 @@ close_prog_noerr: bpf_object__close(obj); } +static void test_task_fd_query_rawtp(void) +{ + const char *file = "./test_get_stack_rawtp.o"; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + struct bpf_object *obj; + int efd, err, prog_fd; + __u32 duration = 0; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + /* query (getpid(), efd) */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err, + errno)) + goto close_prog; + + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + strcmp(buf, "sys_enter") == 0; + if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n", + fd_type, buf)) + goto close_prog; + + /* test zero len */ + len = 0; + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n", + err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter"); + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + /* test empty buffer */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n", + err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter"); + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + /* test smaller buffer */ + len = 3; + err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)", + "err %d errno %d\n", err, errno)) + goto close_prog; + err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT && + len == strlen("sys_enter") && + strcmp(buf, "sy") == 0; + if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void test_task_fd_query_tp_core(const char *probe_name, + const char *tp_name) +{ + const char *file = "./test_tracepoint.o"; + int err, bytes, efd, prog_fd, pmu_fd; + struct perf_event_attr attr = {}; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + struct bpf_object *obj; + __u32 duration = 0; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + goto close_prog; + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read", + "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, + 0 /* cpu 0 */, -1 /* group id */, + 0 /* flags */); + if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + /* query (getpid(), pmu_fd) */ + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id, + &fd_type, &probe_offset, &probe_addr); + if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name); + if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n", + fd_type, buf)) + goto close_pmu; + + close(pmu_fd); + goto close_prog_noerr; + +close_pmu: + close(pmu_fd); +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + +static void test_task_fd_query_tp(void) +{ + test_task_fd_query_tp_core("sched/sched_switch", + "sched_switch"); + test_task_fd_query_tp_core("syscalls/sys_enter_read", + "sys_enter_read"); +} + int main(void) { jit_enabled = is_jit_enabled(); @@ -1561,6 +1717,8 @@ int main(void) test_stacktrace_build_id_nmi(); test_stacktrace_map_raw_tp(); test_get_stack_raw_tp(); + test_task_fd_query_rawtp(); + test_task_fd_query_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 94498eaf872e..4b4f015be217 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1686,6 +1686,121 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { + "valid access family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_ip4 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access local_port in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "valid access remote_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access local_ip6 in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "invalid 64B read of family in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read past end of SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, local_port) + 4), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid read offset in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct sk_msg_md, family) + 1), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { "direct packet read for SK_MSG", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 8fb4fe8686e4..3868dcb63420 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -72,6 +72,18 @@ struct ksym *ksym_search(long key) return &syms[0]; } +long ksym_get_addr(const char *name) +{ + int i; + + for (i = 0; i < sym_cnt; i++) { + if (strcmp(syms[i].name, name) == 0) + return syms[i].addr; + } + + return 0; +} + static int page_size; static int page_cnt = 8; static struct perf_event_mmap_page *header; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 36d90e3b1ea9..3b4bcf7f5084 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -11,6 +11,7 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); +long ksym_get_addr(const char *name); typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index e60dddbf963c..7cb0f49efdb7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,7 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh -TEST_PROGS += udpgso_bench.sh +TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh new file mode 100755 index 000000000000..d4cfb6a7a086 --- /dev/null +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB rules API + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +IP="ip -netns testns" + +RTABLE=100 +GW_IP4=192.51.100.2 +SRC_IP=192.51.100.3 +GW_IP6=2001:db8:1::2 +SRC_IP6=2001:db8:1::3 + +DEV_ADDR=192.51.100.1 +DEV=dummy0 + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +setup() +{ + set -e + ip netns add testns + $IP link set dev lo up + + $IP link add dummy0 type dummy + $IP link set dev dummy0 up + $IP address add 198.51.100.1/24 dev dummy0 + $IP -6 address add 2001:db8:1::1/64 dev dummy0 + + set +e +} + +cleanup() +{ + $IP link del dev dummy0 &> /dev/null + ip netns del testns +} + +fib_check_iproute_support() +{ + ip rule help 2>&1 | grep -q $1 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing $1 match" + return 1 + fi + + ip route get help 2>&1 | grep -q $2 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 get route too old, missing $2 match" + return 1 + fi + + return 0 +} + +fib_rule6_del() +{ + $IP -6 rule del $1 + log_test $? 0 "rule6 del $1" +} + +fib_rule6_del_by_pref() +{ + pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP -6 rule del pref $pref +} + +fib_rule6_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP -6 rule add $match table $RTABLE + $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule6 check: $1" + + fib_rule6_del_by_pref "$match" + log_test $? 0 "rule6 del by pref: $match" +} + +fib_rule6_test() +{ + # setup the fib rule redirect route + $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink + + match="oif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP6 iif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +fib_rule4_del() +{ + $IP rule del $1 + log_test $? 0 "del $1" +} + +fib_rule4_del_by_pref() +{ + pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP rule del pref $pref +} + +fib_rule4_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP rule add $match table $RTABLE + $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule4 check: $1" + + fib_rule4_del_by_pref "$match" + log_test $? 0 "rule4 del by pref: $match" +} + +fib_rule4_test() +{ + # setup the fib rule redirect route + $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink + + match="oif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP iif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +run_fibrule_tests() +{ + log_section "IPv4 fib rule" + fib_rule4_test + log_section "IPv6 fib rule" + fib_rule6_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +# start clean +cleanup &> /dev/null +setup +run_fibrule_tests +cleanup + +exit $ret diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 9164e60d4b66..9780c5a2d73f 100755..100644 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -6,8 +6,11 @@ ret=0 -VERBOSE=${VERBOSE:=0} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +# all tests in this script. Can be overridden with -t option +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no IP="ip -netns testns" log_test() @@ -18,8 +21,10 @@ log_test() if [ ${rc} -eq ${expected} ]; then printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) else ret=1 + nfail=$((nfail+1)) printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo @@ -28,6 +33,13 @@ log_test() [ "$a" = "q" ] && exit 1 fi fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi } setup() @@ -563,20 +575,825 @@ fib_nexthop_test() } ################################################################################ -# +# Tests on route add and replace + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route6() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP -6 ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP -6 ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP -6 ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route6() +{ + add_route6 "2001:db8:104::/64" "$1" +} + +check_route6() +{ + local pfx="2001:db8:104::/64" + local expected="$1" + local out + local rc=0 + + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +route_cleanup() +{ + $IP li del red 2>/dev/null + $IP li del dummy1 2>/dev/null + $IP li del veth1 2>/dev/null + $IP li del veth3 2>/dev/null + + cleanup &> /dev/null +} + +route_setup() +{ + route_cleanup + setup + + [ "${VERBOSE}" = "1" ] && set -x + set -e + + $IP li add red up type vrf table 101 + $IP li add veth1 type veth peer name veth2 + $IP li add veth3 type veth peer name veth4 + + $IP li set veth1 up + $IP li set veth3 up + $IP li set veth2 vrf red up + $IP li set veth4 vrf red up + $IP li add dummy1 type dummy + $IP li set dummy1 vrf red up + + $IP -6 addr add 2001:db8:101::1/64 dev veth1 + $IP -6 addr add 2001:db8:101::2/64 dev veth2 + $IP -6 addr add 2001:db8:103::1/64 dev veth3 + $IP -6 addr add 2001:db8:103::2/64 dev veth4 + $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + + $IP addr add 172.16.101.1/24 dev veth1 + $IP addr add 172.16.101.2/24 dev veth2 + $IP addr add 172.16.103.1/24 dev veth3 + $IP addr add 172.16.103.2/24 dev veth4 + $IP addr add 172.16.104.1/24 dev dummy1 + + set +ex +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv6_rt_add() +{ + local rc + + echo + echo "IPv6 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # iproute2 prepend only sets NLM_F_CREATE + # - adds a new route; does NOT convert existing route to ECMP + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Append nexthop to existing route - gw" + + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1" + log_test $? 0 "Append nexthop to existing route - dev only" + + # multipath route can not have a nexthop that is a reject route + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64" + log_test $? 2 "Append nexthop to existing route - reject route" + + # reject route can not be converted to multipath route + run_cmd "$IP -6 ro flush 2001:db8:104::/64" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2" + log_test $? 2 "Append nexthop to existing reject route - gw" + + run_cmd "$IP -6 ro flush 2001:db8:104::/64" + run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64" + run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3" + log_test $? 2 "Append nexthop to existing reject route - dev only" + + # insert mpath directly + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Add multipath route" + + add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route6 "2001:db8:104::/64" "via 2001:db8:101::2" + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} -fib_test() +ipv6_rt_replace_single() { - if [ -n "$TEST" ]; then - eval $TEST + # single path with single path + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route6 "nexthop via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with reject + # + add_initial_route6 "nexthop via 2001:db8:101::2" + run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64" + check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024" + log_test $? 0 "Single path with reject route" + + # single path with single path using MULTIPATH attribute + # + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2" + check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" else - fib_unreg_test - fib_down_test - fib_carrier_test - fib_nexthop_test + check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024" + log_test $? 0 "Invalid nexthop" fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route6 "via 2001:db8:101::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv6_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3" + check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" + log_test $? 0 "Multipath with single path via multipath attribute" + + # multipath with reject + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64" + check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024" + log_test $? 0 "Multipath with reject route" + + # route replace fails - invalid nexthop 1 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3" + check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv6_rt_replace() +{ + echo + echo "IPv6 route replace tests" + + ipv6_rt_replace_single + ipv6_rt_replace_mpath +} + +ipv6_route_test() +{ + route_setup + + ipv6_rt_add + ipv6_rt_replace + + route_cleanup } +ip_addr_metric_check() +{ + ip addr help 2>&1 | grep -q metric + if [ $? -ne 0 ]; then + echo "iproute2 command does not support metric for addresses. Skipping test" + return 1 + fi + + return 0 +} + +ipv6_addr_metric_test() +{ + local rc + + echo + echo "IPv6 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP -6 addr flush dev dummy1" + run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257" + set +e + + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP -6 addr flush dev dummy2" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258" + set +e + + check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258" + log_test $? 0 "User specified metric on second device" + + run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259" + rc=$? + fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +# add route for a prefix, flushing any existing routes first +# expected to be the first step of a test +add_route() +{ + local pfx="$1" + local nh="$2" + local out + + if [ "$VERBOSE" = "1" ]; then + echo + echo " ##################################################" + echo + fi + + run_cmd "$IP ro flush ${pfx}" + [ $? -ne 0 ] && exit 1 + + out=$($IP ro ls match ${pfx}) + if [ -n "$out" ]; then + echo "Failed to flush routes for prefix used for tests." + exit 1 + fi + + run_cmd "$IP ro add ${pfx} ${nh}" + if [ $? -ne 0 ]; then + echo "Failed to add initial route for test." + exit 1 + fi +} + +# add initial route - used in replace route tests +add_initial_route() +{ + add_route "172.16.104.0/24" "$1" +} + +check_route() +{ + local pfx="172.16.104.0/24" + local expected="$1" + local out + local rc=0 + + out=$($IP ro ls match ${pfx}) + [ "${out}" = "${expected}" ] && return 0 + + if [ -z "${out}" ]; then + if [ "$VERBOSE" = "1" ]; then + printf "\nNo route entry found\n" + printf "Expected:\n" + printf " ${expected}\n" + fi + return 1 + fi + + # tricky way to convert output to 1-line without ip's + # messy '\'; this drops all extra white space + out=$(echo ${out}) + if [ "${out}" != "${expected}" ]; then + rc=1 + if [ "${VERBOSE}" = "1" ]; then + printf " Unexpected route entry. Have:\n" + printf " ${out}\n" + printf " Expected:\n" + printf " ${expected}\n\n" + fi + fi + + return $rc +} + +# assumption is that basic add of a single path route works +# otherwise just adding an address on an interface is broken +ipv4_rt_add() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate route - gw" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 dev veth3" + log_test $? 2 "Attempt to add duplicate route - dev only" + + # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + log_test $? 2 "Attempt to add duplicate route - reject route" + + # iproute2 prepend only sets NLM_F_CREATE + # - adds a new route; does NOT convert existing route to ECMP + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Add new nexthop for existing prefix" + + # route append with same prefix adds a new route + # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing route - gw" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing route - dev only" + + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro append unreachable 172.16.104.0/24" + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24" + log_test $? 0 "Append nexthop to existing route - reject route" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Append nexthop to existing reject route - gw" + + run_cmd "$IP ro flush 172.16.104.0/24" + run_cmd "$IP ro add unreachable 172.16.104.0/24" + run_cmd "$IP ro append 172.16.104.0/24 dev veth3" + check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link" + log_test $? 0 "Append nexthop to existing reject route - dev only" + + # insert mpath directly + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "add multipath route" + + add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + log_test $? 2 "Attempt to add duplicate multipath route" + + # insert of a second route without append but different metric + add_route "172.16.104.0/24" "via 172.16.101.2" + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256" + rc=$? + fi + log_test $rc 0 "Route add with different metrics" + + run_cmd "$IP ro del 172.16.104.0/24 metric 512" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256" + rc=$? + fi + log_test $rc 0 "Route delete with metric" +} + +ipv4_rt_replace_single() +{ + # single path with single path + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path" + + # single path with multipath + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Single path with multipath" + + # single path with reject + # + add_initial_route "nexthop via 172.16.101.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Single path with reject route" + + # single path with single path using MULTIPATH attribute + # + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2" + check_route "172.16.104.0/24 via 172.16.103.2 dev veth3" + log_test $? 0 "Single path with single path via multipath attribute" + + # route replace fails - invalid nexthop + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2" + if [ $? -eq 0 ]; then + # previous command is expected to fail so if it returns 0 + # that means the test failed. + log_test 0 1 "Invalid nexthop" + else + check_route "172.16.104.0/24 via 172.16.101.2 dev veth1" + log_test $? 0 "Invalid nexthop" + fi + + # replace non-existent route + # - note use of change versus replace since ip adds NLM_F_CREATE + # for replace + add_initial_route "via 172.16.101.2" + run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2" + log_test $? 2 "Single path - replace of non-existent route" +} + +ipv4_rt_replace_mpath() +{ + # multipath with multipath + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1" + log_test $? 0 "Multipath with multipath" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path" + + # multipath with single + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3" + check_route "172.16.104.0/24 via 172.16.101.3 dev veth1" + log_test $? 0 "Multipath with single path via multipath attribute" + + # multipath with reject + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace unreachable 172.16.104.0/24" + check_route "unreachable 172.16.104.0/24" + log_test $? 0 "Multipath with reject route" + + # route replace fails - invalid nexthop 1 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid first nexthop" + + # route replace fails - invalid nexthop 2 + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3" + check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + log_test $? 0 "Multipath - invalid second nexthop" + + # multipath non-existent route + add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2" + run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3" + log_test $? 2 "Multipath - replace of non-existent route" +} + +ipv4_rt_replace() +{ + echo + echo "IPv4 route replace tests" + + ipv4_rt_replace_single + ipv4_rt_replace_mpath +} + +ipv4_route_test() +{ + route_setup + + ipv4_rt_add + ipv4_rt_replace + + route_cleanup +} + +ipv4_addr_metric_test() +{ + local rc + + echo + echo "IPv4 prefix route tests" + + ip_addr_metric_check || return 1 + + setup + + set -e + $IP li add dummy1 type dummy + $IP li add dummy2 type dummy + $IP li set dummy1 up + $IP li set dummy2 up + + # default entry is metric 256 + run_cmd "$IP addr add dev dummy1 172.16.104.1/24" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2" + log_test $? 0 "Default metric" + + set -e + run_cmd "$IP addr flush dev dummy1" + run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257" + set +e + + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257" + log_test $? 0 "User specified metric on first device" + + set -e + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258" + set +e + + check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + log_test $? 0 "User specified metric on second device" + + run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258" + rc=$? + fi + log_test $rc 0 "Delete of address on first device" + + run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? + fi + log_test $rc 0 "Modify metric of address" + + # verify prefix route removed on down + run_cmd "$IP li set dev dummy2 down" + rc=$? + if [ $rc -eq 0 ]; then + check_route "" + rc=$? + fi + log_test $rc 0 "Prefix route removed on link down" + + # verify prefix route re-inserted with assigned metric + run_cmd "$IP li set dev dummy2 up" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259" + rc=$? + fi + log_test $rc 0 "Prefix route with metric on link up" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +while getopts :t:pPhv o +do + case $o in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +PEER_CMD="ip netns exec ${PEER_NS}" + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" exit 0 @@ -596,6 +1413,25 @@ fi # start clean cleanup &> /dev/null -fib_test +for t in $TESTS +do + case $t in + fib_unreg_test|unregister) fib_unreg_test;; + fib_down_test|down) fib_down_test;; + fib_carrier_test|carrier) fib_carrier_test;; + fib_nexthop_test|nexthop) fib_nexthop_test;; + ipv6_route_test|ipv6_rt) ipv6_route_test;; + ipv4_route_test|ipv4_rt) ipv4_route_test;; + ipv6_addr_metric) ipv6_addr_metric_test;; + ipv4_addr_metric) ipv4_addr_metric_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi exit $ret diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 91041c49655b..7b18a53aa556 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -340,6 +340,31 @@ tunnel_destroy() ip link del dev $name } +vlan_create() +{ + local if_name=$1; shift + local vid=$1; shift + local vrf=$1; shift + local ips=("${@}") + local name=$if_name.$vid + + ip link add name $name link $if_name type vlan id $vid + if [ "$vrf" != "" ]; then + ip link set dev $name master $vrf + fi + ip link set dev $name up + __addr_add_del $name add "${ips[@]}" +} + +vlan_destroy() +{ + local if_name=$1; shift + local vid=$1; shift + local name=$if_name.$vid + + ip link del dev $name +} + master_name_get() { local if_name=$1 @@ -423,26 +448,35 @@ tc_offload_check() return 0 } -slow_path_trap_install() +trap_install() { local dev=$1; shift local direction=$1; shift - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - # For slow-path testing, we need to install a trap to get to - # slow path the packets that would otherwise be switched in HW. - tc filter add dev $dev $direction pref 1 \ - flower skip_sw action trap - fi + # For slow-path testing, we need to install a trap to get to + # slow path the packets that would otherwise be switched in HW. + tc filter add dev $dev $direction pref 1 flower skip_sw action trap } -slow_path_trap_uninstall() +trap_uninstall() { local dev=$1; shift local direction=$1; shift + tc filter del dev $dev $direction pref 1 flower skip_sw +} + +slow_path_trap_install() +{ + if [ "${tcflags/skip_hw}" != "$tcflags" ]; then + trap_install "$@" + fi +} + +slow_path_trap_uninstall() +{ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - tc filter del dev $dev $direction pref 1 flower skip_sw + trap_uninstall "$@" fi } @@ -480,6 +514,29 @@ icmp6_capture_uninstall() __icmp_capture_add_del del 100 v6 "$@" } +__vlan_capture_add_del() +{ + local add_del=$1; shift + local pref=$1; shift + local dev=$1; shift + local filter=$1; shift + + tc filter $add_del dev "$dev" ingress \ + proto 802.1q pref $pref \ + flower $filter \ + action pass +} + +vlan_capture_install() +{ + __vlan_capture_add_del add 100 "$@" +} + +vlan_capture_uninstall() +{ + __vlan_capture_add_del del 100 "$@" +} + matchall_sink_create() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index c6786d1b2b96..e6fd7a18c655 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -72,7 +72,6 @@ test_span_gre_mac() RET=0 mirror_install $swp1 $direction $tundev "matchall $tcflags" - tc qdisc add dev $h3 clsact tc filter add dev $h3 ingress pref 77 prot $prot \ flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ action pass @@ -80,7 +79,6 @@ test_span_gre_mac() mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 tc filter del dev $h3 ingress pref 77 - tc qdisc del dev $h3 clsact mirror_uninstall $swp1 $direction log_test "$direction $what: envelope MAC ($tcflags)" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh new file mode 100755 index 000000000000..3bb4c2ba7b14 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# bridge device without vlan filtering (802.1d). The device attached to that +# bridge is a VLAN. + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + vlan_create $swp3 555 + + ip link set dev $swp3.555 master br2 + ip route add 192.0.2.130/32 dev br2 + ip -6 route add 2001:db8:2::2/128 dev br2 + + ip address add dev br2 192.0.2.129/32 + ip address add dev br2 2001:db8:2::1/128 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h3 555 + ip link del dev br2 + vlan_destroy $swp3 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 50ab3462af0c..aa29d46186a8 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -12,6 +12,8 @@ ALL_TESTS=" test_tun_up test_egress_up test_remote_ip + test_tun_del + test_route_del " NUM_NETIFS=6 @@ -71,7 +73,6 @@ test_span_gre_ttl() RET=0 mirror_install $swp1 ingress $tundev "matchall $tcflags" - tc qdisc add dev $h3 clsact tc filter add dev $h3 ingress pref 77 prot $prot \ flower ip_ttl 50 action pass @@ -82,7 +83,6 @@ test_span_gre_ttl() ip link set dev $tundev type $type ttl 100 tc filter del dev $h3 ingress pref 77 - tc qdisc del dev $h3 clsact mirror_uninstall $swp1 ingress log_test "$what: TTL change ($tcflags)" @@ -159,6 +159,58 @@ test_span_gre_remote_ip() log_test "$what: remote address change ($tcflags)" } +test_span_gre_tun_del() +{ + local tundev=$1; shift + local type=$1; shift + local flags=$1; shift + local local_ip=$1; shift + local remote_ip=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + ip link del dev $tundev + fail_test_span_gre_dir $tundev ingress + + tunnel_create $tundev $type $local_ip $remote_ip \ + ttl 100 tos inherit $flags + + # Recreating the tunnel doesn't reestablish mirroring, so reinstall it + # and verify it works for the follow-up tests. + mirror_uninstall $swp1 ingress + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + mirror_uninstall $swp1 ingress + + log_test "$what: tunnel deleted ($tcflags)" +} + +test_span_gre_route_del() +{ + local tundev=$1; shift + local edev=$1; shift + local route=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + ip route del $route dev $edev + fail_test_span_gre_dir $tundev ingress + + ip route add $route dev $edev + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: underlay route removal ($tcflags)" +} + test_ttl() { test_span_gre_ttl gt4 gretap ip "mirror to gretap" @@ -183,6 +235,20 @@ test_remote_ip() test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap" } +test_tun_del() +{ + test_span_gre_tun_del gt4 gretap "" \ + 192.0.2.129 192.0.2.130 "mirror to gretap" + test_span_gre_tun_del gt6 ip6gretap allow-localremote \ + 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap" +} + +test_route_del() +{ + test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap" + test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" +} + test_all() { slow_path_trap_install $swp1 ingress diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh index 2e54407d8954..12914f40612d 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -67,6 +67,11 @@ test_span_gre_dir_acl() test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 } +fail_test_span_gre_dir_acl() +{ + fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 +} + full_test_span_gre_dir_acl() { local tundev=$1; shift @@ -83,6 +88,9 @@ full_test_span_gre_dir_acl() "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction + # Test lack of mirroring after ACL mirror is uninstalled. + fail_test_span_gre_dir_acl "$tundev" "$direction" + log_test "$direction $what ($tcflags)" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index 207ffd167dba..619b469365be 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -1,53 +1,53 @@ # SPDX-License-Identifier: GPL-2.0 -do_test_span_gre_dir_ips() +source mirror_lib.sh + +quick_test_span_gre_dir_ips() { - local expect=$1; shift local tundev=$1; shift - local direction=$1; shift - local ip1=$1; shift - local ip2=$1; shift - icmp_capture_install h3-$tundev - mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 $expect - mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 $expect - icmp_capture_uninstall h3-$tundev + do_test_span_dir_ips 10 h3-$tundev "$@" } -quick_test_span_gre_dir_ips() +fail_test_span_gre_dir_ips() { - do_test_span_gre_dir_ips 10 "$@" + local tundev=$1; shift + + do_test_span_dir_ips 0 h3-$tundev "$@" } -fail_test_span_gre_dir_ips() +test_span_gre_dir_ips() { - do_test_span_gre_dir_ips 0 "$@" + local tundev=$1; shift + + test_span_dir_ips h3-$tundev "$@" } -test_span_gre_dir_ips() +full_test_span_gre_dir_ips() { local tundev=$1; shift local direction=$1; shift local forward_type=$1; shift local backward_type=$1; shift + local what=$1; shift local ip1=$1; shift local ip2=$1; shift - quick_test_span_gre_dir_ips "$tundev" "$direction" "$ip1" "$ip2" + RET=0 - icmp_capture_install h3-$tundev "type $forward_type" - mirror_test v$h1 $ip1 $ip2 h3-$tundev 100 10 - icmp_capture_uninstall h3-$tundev + mirror_install $swp1 $direction $tundev "matchall $tcflags" + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + mirror_uninstall $swp1 $direction - icmp_capture_install h3-$tundev "type $backward_type" - mirror_test v$h2 $ip2 $ip1 h3-$tundev 100 10 - icmp_capture_uninstall h3-$tundev + log_test "$direction $what ($tcflags)" } -full_test_span_gre_dir_ips() +full_test_span_gre_dir_vlan_ips() { local tundev=$1; shift local direction=$1; shift + local vlan_match=$1; shift local forward_type=$1; shift local backward_type=$1; shift local what=$1; shift @@ -57,8 +57,16 @@ full_test_span_gre_dir_ips() RET=0 mirror_install $swp1 $direction $tundev "matchall $tcflags" - test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ - "$backward_type" "$ip1" "$ip2" + + test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + "$backward_type" "$ip1" "$ip2" + + tc filter add dev $h3 ingress pref 77 prot 802.1q \ + flower $vlan_match ip_proto 0x2f \ + action pass + mirror_test v$h1 $ip1 $ip2 $h3 77 10 + tc filter del dev $h3 ingress pref 77 + mirror_uninstall $swp1 $direction log_test "$direction $what ($tcflags)" @@ -83,3 +91,40 @@ full_test_span_gre_dir() { full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 } + +full_test_span_gre_dir_vlan() +{ + full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 +} + +full_test_span_gre_stp_ips() +{ + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state disabled + sleep 1 + fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + bridge link set dev $nbpdev state forwarding + sleep 1 + quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + + mirror_uninstall $swp1 ingress + + log_test "$what: STP state ($tcflags)" +} + +full_test_span_gre_stp() +{ + full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh index b3ceda2b4197..253419564708 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -33,29 +33,11 @@ # | | # +-------------------------------------------------------------------------+ -mirror_gre_topo_h1_create() -{ - simple_if_init $h1 192.0.2.1/28 -} - -mirror_gre_topo_h1_destroy() -{ - simple_if_fini $h1 192.0.2.1/28 -} - -mirror_gre_topo_h2_create() -{ - simple_if_init $h2 192.0.2.2/28 -} - -mirror_gre_topo_h2_destroy() -{ - simple_if_fini $h2 192.0.2.2/28 -} +source mirror_topo_lib.sh mirror_gre_topo_h3_create() { - simple_if_init $h3 + mirror_topo_h3_create tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129 ip link set h3-gt4 vrf v$h3 @@ -71,49 +53,32 @@ mirror_gre_topo_h3_destroy() tunnel_destroy h3-gt6 tunnel_destroy h3-gt4 - simple_if_fini $h3 + mirror_topo_h3_destroy } mirror_gre_topo_switch_create() { - ip link set dev $swp3 up - - ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - - ip link set dev $swp1 master br1 - ip link set dev $swp1 up - - ip link set dev $swp2 master br1 - ip link set dev $swp2 up + mirror_topo_switch_create tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \ ttl 100 tos inherit tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \ ttl 100 tos inherit allow-localremote - - tc qdisc add dev $swp1 clsact } mirror_gre_topo_switch_destroy() { - tc qdisc del dev $swp1 clsact - tunnel_destroy gt6 tunnel_destroy gt4 - ip link set dev $swp1 down - ip link set dev $swp2 down - ip link del dev br1 - - ip link set dev $swp3 down + mirror_topo_switch_destroy } mirror_gre_topo_create() { - mirror_gre_topo_h1_create - mirror_gre_topo_h2_create + mirror_topo_h1_create + mirror_topo_h2_create mirror_gre_topo_h3_create mirror_gre_topo_switch_create @@ -124,6 +89,6 @@ mirror_gre_topo_destroy() mirror_gre_topo_switch_destroy mirror_gre_topo_h3_destroy - mirror_gre_topo_h2_destroy - mirror_gre_topo_h1_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh new file mode 100755 index 000000000000..88cecdb9a861 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice +# whose underlay route points at a vlan device. + +ALL_TESTS=" + test_gretap +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + ip link add name $swp3.555 link $swp3 type vlan id 555 + ip address add dev $swp3.555 192.0.2.129/32 + ip address add dev $swp3.555 2001:db8:2::1/128 + ip link set dev $swp3.555 up + + ip route add 192.0.2.130/32 dev $swp3.555 + ip -6 route add 2001:db8:2::2/128 dev $swp3.555 + + ip link add name $h3.555 link $h3 type vlan id 555 + ip link set dev $h3.555 master v$h3 + ip address add dev $h3.555 192.0.2.130/28 + ip address add dev $h3.555 2001:db8:2::2/64 + ip link set dev $h3.555 up +} + +cleanup() +{ + pre_cleanup + + ip link del dev $h3.555 + ip link del dev $swp3.555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_gretap() +{ + full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap" + full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh new file mode 100755 index 000000000000..5dbc7a08f4bd --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -0,0 +1,270 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing gretap. See +# mirror_gre_topo_lib.sh for more details. +# +# Test for "tc action mirred egress mirror" when the underlay route points at a +# vlan device on top of a bridge device with vlan filtering (802.1q). + +ALL_TESTS=" + test_gretap + test_ip6gretap + test_gretap_forbidden_cpu + test_ip6gretap_forbidden_cpu + test_gretap_forbidden_egress + test_ip6gretap_forbidden_egress + test_gretap_untagged_egress + test_ip6gretap_untagged_egress + test_gretap_fdb_roaming + test_ip6gretap_fdb_roaming + test_gretap_stp + test_ip6gretap_stp +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_gre_lib.sh +source mirror_gre_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_gre_topo_create + + vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128 + bridge vlan add dev br1 vid 555 self + ip route rep 192.0.2.130/32 dev br1.555 + ip -6 route rep 2001:db8:2::2/128 dev br1.555 + + vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64 + + ip link set dev $swp3 master br1 + bridge vlan add dev $swp3 vid 555 + bridge vlan add dev $swp2 vid 555 +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp3 nomaster + vlan_destroy $h3 555 + vlan_destroy br1 555 + + mirror_gre_topo_destroy + vrf_cleanup +} + +test_vlan_match() +{ + local tundev=$1; shift + local vlan_match=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what" + full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what" +} + +test_gretap() +{ + test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap" +} + +test_ip6gretap() +{ + test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap" +} + +test_span_gre_forbidden_cpu() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + # Run the pass-test first, to prime neighbor table. + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + # Now forbid the VLAN at the bridge and see it fail. + bridge vlan del dev br1 vid 555 self + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev br1 vid 555 self + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge ($tcflags)" +} + +test_gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_cpu() +{ + test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap" +} + +test_span_gre_forbidden_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge vlan del dev $swp3 vid 555 + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge vlan add dev $swp3 vid 555 + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan forbidden at a bridge egress ($tcflags)" +} + +test_gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt4 "mirror to gretap" +} + +test_ip6gretap_forbidden_egress() +{ + test_span_gre_forbidden_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_untagged_egress() +{ + local tundev=$1; shift + local what=$1; shift + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 pvid untagged + sleep 1 + quick_test_span_gre_dir $tundev ingress + fail_test_span_vlan_dir $h3 555 ingress + + bridge vlan add dev $swp3 vid 555 + sleep 1 + quick_test_span_gre_dir $tundev ingress + quick_test_span_vlan_dir $h3 555 ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: vlan untagged at a bridge egress ($tcflags)" +} + +test_gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt4 "mirror to gretap" +} + +test_ip6gretap_untagged_egress() +{ + test_span_gre_untagged_egress gt6 "mirror to ip6gretap" +} + +test_span_gre_fdb_roaming() +{ + local tundev=$1; shift + local what=$1; shift + local h3mac=$(mac_get $h3) + + RET=0 + + mirror_install $swp1 ingress $tundev "matchall $tcflags" + quick_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp3 $h3mac vlan 555 master + bridge fdb add dev $swp2 $h3mac vlan 555 master + sleep 1 + fail_test_span_gre_dir $tundev ingress + + bridge fdb del dev $swp2 $h3mac vlan 555 master + # Re-prime FDB + arping -I br1.555 192.0.2.130 -fqc 1 + sleep 1 + quick_test_span_gre_dir $tundev ingress + + mirror_uninstall $swp1 ingress + + log_test "$what: MAC roaming ($tcflags)" +} + +test_gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt4 "mirror to gretap" +} + +test_ip6gretap_fdb_roaming() +{ + test_span_gre_fdb_roaming gt6 "mirror to ip6gretap" +} + +test_gretap_stp() +{ + full_test_span_gre_stp gt4 $swp3 "mirror to gretap" +} + +test_ip6gretap_stp() +{ + full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + + tests_run + + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index e5028a5725e3..d36dc26c6c51 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -38,3 +38,95 @@ mirror_test() ((expect <= delta && delta <= expect + 2)) check_err $? "Expected to capture $expect packets, got $delta." } + +do_test_span_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + icmp_capture_install $dev + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + icmp_capture_uninstall $dev +} + +quick_test_span_dir_ips() +{ + do_test_span_dir_ips 10 "$@" +} + +fail_test_span_dir_ips() +{ + do_test_span_dir_ips 0 "$@" +} + +test_span_dir_ips() +{ + local dev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + + icmp_capture_install $dev "type $forward_type" + mirror_test v$h1 $ip1 $ip2 $dev 100 10 + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" + mirror_test v$h2 $ip2 $ip1 $dev 100 10 + icmp_capture_uninstall $dev +} + +fail_test_span_dir() +{ + fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +test_span_dir() +{ + test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +do_test_span_vlan_dir_ips() +{ + local expect=$1; shift + local dev=$1; shift + local vid=$1; shift + local direction=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + # Install the capture as skip_hw to avoid double-counting of packets. + # The traffic is meant for local box anyway, so will be trapped to + # kernel. + vlan_capture_install $dev "skip_hw vlan_id $vid" + mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + vlan_capture_uninstall $dev +} + +quick_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 10 "$@" +} + +fail_test_span_vlan_dir_ips() +{ + do_test_span_vlan_dir_ips 0 "$@" +} + +quick_test_span_vlan_dir() +{ + quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} + +fail_test_span_vlan_dir() +{ + fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh new file mode 100644 index 000000000000..04979e5962e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is the standard topology for testing mirroring. The tests that use it +# tweak it in one way or another--typically add more devices to the topology. +# +# +---------------------+ +---------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +-----|---------------+ +---------------|-----+ +# | | +# +-----|-------------------------------------------------------------|-----+ +# | SW o--> mirror | | +# | +---|-------------------------------------------------------------|---+ | +# | | + $swp1 BR $swp2 + | | +# | +---------------------------------------------------------------------+ | +# | | +# | + $swp3 | +# +-----|-------------------------------------------------------------------+ +# | +# +-----|-------------------------------------------------------------------+ +# | H3 + $h3 | +# | | +# +-------------------------------------------------------------------------+ + +mirror_topo_h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +mirror_topo_h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +mirror_topo_h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +mirror_topo_h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +mirror_topo_h3_create() +{ + simple_if_init $h3 + tc qdisc add dev $h3 clsact +} + +mirror_topo_h3_destroy() +{ + tc qdisc del dev $h3 clsact + simple_if_fini $h3 +} + +mirror_topo_switch_create() +{ + ip link set dev $swp3 up + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact +} + +mirror_topo_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br1 + + ip link set dev $swp3 down +} + +mirror_topo_create() +{ + mirror_topo_h1_create + mirror_topo_h2_create + mirror_topo_h3_create + + mirror_topo_switch_create +} + +mirror_topo_destroy() +{ + mirror_topo_switch_destroy + + mirror_topo_h3_destroy + mirror_topo_h2_destroy + mirror_topo_h1_destroy +} diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh new file mode 100755 index 000000000000..20b37a53657e --- /dev/null +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh +# for more details. +# +# Test for "tc action mirred egress mirror" that mirrors to a vlan device. + +ALL_TESTS=" + test_vlan + test_tagged_vlan +" + +NUM_NETIFS=6 +source lib.sh +source mirror_lib.sh +source mirror_topo_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + mirror_topo_create + + vlan_create $swp3 555 + + vlan_create $h3 555 v$h3 + matchall_sink_create $h3.555 + + vlan_create $h1 111 v$h1 192.0.2.17/28 + bridge vlan add dev $swp1 vid 111 + + vlan_create $h2 111 v$h2 192.0.2.18/28 + bridge vlan add dev $swp2 vid 111 +} + +cleanup() +{ + pre_cleanup + + vlan_destroy $h2 111 + vlan_destroy $h1 111 + vlan_destroy $h3 555 + vlan_destroy $swp3 555 + + mirror_topo_destroy + vrf_cleanup +} + +test_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_uninstall $swp1 $direction + + log_test "$direction mirror to vlan ($tcflags)" +} + +test_vlan() +{ + test_vlan_dir ingress 8 0 + test_vlan_dir egress 0 8 +} + +test_tagged_vlan_dir() +{ + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + RET=0 + + mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" + do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \ + 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \ + 192.0.2.17 192.0.2.18 + mirror_uninstall $swp1 $direction + + log_test "$direction mirror to vlan ($tcflags)" +} + +test_tagged_vlan() +{ + test_tagged_vlan_dir ingress 8 0 + test_tagged_vlan_dir egress 0 8 +} + +test_all() +{ + slow_path_trap_install $swp1 ingress + slow_path_trap_install $swp1 egress + trap_install $h3 ingress + + tests_run + + trap_install $h3 ingress + slow_path_trap_uninstall $swp1 egress + slow_path_trap_uninstall $swp1 ingress +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tcflags="skip_hw" +test_all + +if ! tc_offload_check; then + echo "WARN: Could not test offloaded functionality" +else + tcflags="skip_sw" + test_all +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 168c66d74fc5..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile new file mode 100644 index 000000000000..f7baa9aa2932 --- /dev/null +++ b/tools/testing/selftests/uevent/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +all: + +include ../lib.mk + +.PHONY: all clean + +BINARIES := uevent_filtering +CFLAGS += -Wl,-no-as-needed -Wall + +uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h + $(CC) $(CFLAGS) $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config new file mode 100644 index 000000000000..1038f4515be8 --- /dev/null +++ b/tools/testing/selftests/uevent/config @@ -0,0 +1,2 @@ +CONFIG_USER_NS=y +CONFIG_NET=y diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c new file mode 100644 index 000000000000..f83391aa42cf --- /dev/null +++ b/tools/testing/selftests/uevent/uevent_filtering.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/netlink.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sched.h> +#include <sys/eventfd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent" +#define __UEVENT_BUFFER_SIZE (2048 * 2) +#define __UEVENT_HEADER "add@/devices/virtual/mem/full" +#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full") +#define __UEVENT_LISTEN_ALL -1 + +ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = read(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (ret != pid) + goto again; + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + + return 0; +} + +static int uevent_listener(unsigned long post_flags, bool expect_uevent, + int sync_fd) +{ + int sk_fd, ret; + socklen_t sk_addr_len; + int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE; + uint64_t sync_add = 1; + struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 }; + char buf[__UEVENT_BUFFER_SIZE] = { 0 }; + struct iovec iov = { buf, __UEVENT_BUFFER_SIZE }; + char control[CMSG_SPACE(sizeof(struct ucred))]; + struct msghdr hdr = { + &rcv_addr, sizeof(rcv_addr), &iov, 1, + control, sizeof(control), 0, + }; + + sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, + NETLINK_KOBJECT_UEVENT); + if (sk_fd < 0) { + fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno)); + return -1; + } + + ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, + sizeof(rcv_buf_sz)); + if (ret < 0) { + fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno)); + goto on_error; + } + + sk_addr.nl_family = AF_NETLINK; + sk_addr.nl_groups = __UEVENT_LISTEN_ALL; + + sk_addr_len = sizeof(sk_addr); + ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno)); + goto on_error; + } + + ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno)); + goto on_error; + } + + if ((size_t)sk_addr_len != sizeof(sk_addr)) { + fprintf(stderr, "Invalid socket address size\n"); + goto on_error; + } + + if (post_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + goto on_error; + } + } + + if (post_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + goto on_error; + } + } + + ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add)); + close(sync_fd); + if (ret != sizeof(sync_add)) { + fprintf(stderr, "Failed to synchronize with parent process\n"); + goto on_error; + } + + fret = 0; + for (;;) { + ssize_t r; + + r = recvmsg(sk_fd, &hdr, 0); + if (r <= 0) { + fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno)); + ret = -1; + break; + } + + /* ignore libudev messages */ + if (memcmp(buf, "libudev", 8) == 0) + continue; + + /* ignore uevents we didn't trigger */ + if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0) + continue; + + if (!expect_uevent) { + fprintf(stderr, "Received unexpected uevent:\n"); + ret = -1; + } + + if (TH_LOG_ENABLED) { + /* If logging is enabled dump the received uevent. */ + (void)write_nointr(STDERR_FILENO, buf, r); + (void)write_nointr(STDERR_FILENO, "\n", 1); + } + + break; + } + +on_error: + close(sk_fd); + + return fret; +} + +int trigger_uevent(unsigned int times) +{ + int fd, ret; + unsigned int i; + + fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return -EINVAL; + + return -1; + } + + for (i = 0; i < times; i++) { + ret = write_nointr(fd, "add\n", sizeof("add\n") - 1); + if (ret < 0) { + fprintf(stderr, "Failed to trigger uevent\n"); + break; + } + } + close(fd); + + return ret; +} + +int set_death_signal(void) +{ + int ret; + pid_t ppid; + + ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + + /* Check whether we have been orphaned. */ + ppid = getppid(); + if (ppid == 1) { + pid_t self; + + self = getpid(); + ret = kill(self, SIGKILL); + } + + if (ret < 0) + return -1; + + return 0; +} + +static int do_test(unsigned long pre_flags, unsigned long post_flags, + bool expect_uevent, int sync_fd) +{ + int ret; + uint64_t wait_val; + pid_t pid; + sigset_t mask; + sigset_t orig_mask; + struct timespec timeout; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + + ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask); + if (ret < 0) { + fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno)); + return -1; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno)); + return -1; + } + + if (pid == 0) { + /* Make sure that we go away when our parent dies. */ + ret = set_death_signal(); + if (ret < 0) { + fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n"); + _exit(EXIT_FAILURE); + } + + if (pre_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (pre_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val)); + if (ret != sizeof(wait_val)) { + fprintf(stderr, "Failed to synchronize with child process\n"); + _exit(EXIT_FAILURE); + } + + /* Trigger 10 uevents to account for the case where the kernel might + * drop some. + */ + ret = trigger_uevent(10); + if (ret < 0) + fprintf(stderr, "Failed triggering uevents\n"); + + /* Wait for 2 seconds before considering this failed. This should be + * plenty of time for the kernel to deliver the uevent even under heavy + * load. + */ + timeout.tv_sec = 2; + timeout.tv_nsec = 0; + +again: + ret = sigtimedwait(&mask, NULL, &timeout); + if (ret < 0) { + if (errno == EINTR) + goto again; + + if (!expect_uevent) + ret = kill(pid, SIGTERM); /* success */ + else + ret = kill(pid, SIGUSR1); /* error */ + if (ret < 0) + return -1; + } + + ret = wait_for_pid(pid); + if (ret < 0) + return -1; + + return ret; +} + +static void signal_handler(int sig) +{ + if (sig == SIGTERM) + _exit(EXIT_SUCCESS); + + _exit(EXIT_FAILURE); +} + +TEST(uevent_filtering) +{ + int ret, sync_fd; + struct sigaction act; + + if (geteuid()) { + TH_LOG("Uevent filtering tests require root privileges. Skipping test"); + _exit(KSFT_SKIP); + } + + ret = access(__DEV_FULL, F_OK); + EXPECT_EQ(0, ret) { + if (errno == ENOENT) { + TH_LOG(__DEV_FULL " does not exist. Skipping test"); + _exit(KSFT_SKIP); + } + + _exit(KSFT_FAIL); + } + + act.sa_handler = signal_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + ret = sigaction(SIGTERM, &act, NULL); + ASSERT_EQ(0, ret); + + sync_fd = eventfd(0, EFD_CLOEXEC); + ASSERT_GE(sync_fd, 0); + + /* + * Setup: + * - Open uevent listening socket in initial network namespace owned by + * initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWNET, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - unshare user namespace + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWUSER, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by non-initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives no uevent + */ + ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + +do_cleanup: + close(sync_fd); +} + +TEST_HARNESS_MAIN |