diff options
Diffstat (limited to 'tools')
151 files changed, 15029 insertions, 1291 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 89a048c2faec..28c4a502b419 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -331,6 +331,8 @@ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index edbe81534c6d..d07ccf8a23f7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -137,4 +137,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index f55a2daed59b..64b001b4f777 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -42,7 +42,8 @@ MAP COMMANDS | | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** -| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** } +| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** +| | **queue** | **stack** } DESCRIPTION =========== @@ -127,6 +128,10 @@ OPTIONS -f, --bpffs Show file names of pinned maps. + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. + EXAMPLES ======== **# bpftool map show** @@ -169,6 +174,67 @@ The following three commands are equivalent: | **# bpftool map pin id 10 /sys/fs/bpf/map** | **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00** +Note that map update can also be used in order to change the program references +hold by a program array map. This can be used, for example, to change the +programs used for tail-call jumps at runtime, without having to reload the +entry-point program. Below is an example for this use case: we load a program +defining a prog array map, and with a main function that contains a tail call +to other programs that can be used either to "process" packets or to "debug" +processing. Note that the prog array map MUST be pinned into the BPF virtual +file system for the map update to work successfully, as kernel flushes prog +array maps when they have no more references from user space (and the update +would be lost as soon as bpftool exits). + +| +| **# bpftool prog loadall tail_calls.o /sys/fs/bpf/foo type xdp** +| **# bpftool prog --bpffs** + +:: + + 545: xdp name main_func tag 674b4b5597193dc3 gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 240B jited 257B memlock 4096B map_ids 294 + pinned /sys/fs/bpf/foo/xdp + 546: xdp name bpf_func_process tag e369a529024751fc gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 200B jited 164B memlock 4096B + pinned /sys/fs/bpf/foo/process + 547: xdp name bpf_func_debug tag 0b597868bc7f0976 gpl + loaded_at 2018-12-12T15:02:58+0000 uid 0 + xlated 200B jited 164B memlock 4096B + pinned /sys/fs/bpf/foo/debug + +**# bpftool map** + +:: + + 294: prog_array name jmp_table flags 0x0 + key 4B value 4B max_entries 1 memlock 4096B + owner_prog_type xdp owner jited + +| +| **# bpftool map pin id 294 /sys/fs/bpf/bar** +| **# bpftool map dump pinned /sys/fs/bpf/bar** + +:: + + Found 0 elements + +| +| **# bpftool map update pinned /sys/fs/bpf/bar key 0 0 0 0 value pinned /sys/fs/bpf/foo/debug** +| **# bpftool map dump pinned /sys/fs/bpf/bar** + +:: + + key: 00 00 00 00 value: 22 02 00 00 + Found 1 element + SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 408ec30d8872..ed87c9b619ad 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -136,4 +136,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index e3eb0eab7641..f4c5e5538bb8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -78,4 +78,10 @@ EXAMPLES SEE ALSO ======== - **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index ac4e904b10fb..58c8369b77dd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -15,18 +15,20 @@ SYNOPSIS *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** + | **loadall** | **help** } MAP COMMANDS ============= | **bpftool** **prog { show | list }** [*PROG*] -| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}] -| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] +| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}] +| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}] | **bpftool** **prog pin** *PROG* *FILE* -| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] -| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* *MAP* -| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* *MAP* +| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] +| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] +| **bpftool** **prog tracelog** | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } @@ -39,7 +41,9 @@ MAP COMMANDS | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | } -| *ATTACH_TYPE* := { **msg_verdict** | **skb_verdict** | **skb_parse** } +| *ATTACH_TYPE* := { +| **msg_verdict** | **skb_verdict** | **skb_parse** | **flow_dissector** +| } DESCRIPTION @@ -52,7 +56,7 @@ DESCRIPTION Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). - **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }] + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the program from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** @@ -65,13 +69,23 @@ DESCRIPTION built instead, and eBPF instructions will be presented with CFG in DOT format, on standard output. - **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }] + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. + + **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. **opcodes** controls if raw opcodes will be printed. + If the prog has line_info available, the source line will + be displayed by default. If **linum** is specified, + the filename, line number and line column will also be + displayed on top of the source line. + **bpftool prog pin** *PROG* *FILE* Pin program *PROG* as *FILE*. @@ -79,8 +93,11 @@ DESCRIPTION contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] - Load bpf program from binary *OBJ* and pin as *FILE*. + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] + Load bpf program(s) from binary *OBJ* and pin as *PATH*. + **bpftool prog load** pins only the first program from the + *OBJ* as *PATH*. **bpftool prog loadall** pins all programs + from the *OBJ* under *PATH* directory. **type** is optional, if not specified program type will be inferred from section names. By default bpftool will create new maps as declared in the ELF @@ -92,18 +109,32 @@ DESCRIPTION use, referring to it by **id** or through a **pinned** file. If **dev** *NAME* is specified program will be loaded onto given networking device (offload). + Optional **pinmaps** argument can be provided to pin all + maps under *MAP_DIR* directory. - Note: *FILE* must be located in *bpffs* mount. It must not + Note: *PATH* must be located in *bpffs* mount. It must not contain a dot character ('.'), which is reserved for future extensions of *bpffs*. - **bpftool prog attach** *PROG* *ATTACH_TYPE* *MAP* - Attach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - to the map *MAP*. - - **bpftool prog detach** *PROG* *ATTACH_TYPE* *MAP* - Detach bpf program *PROG* (with type specified by *ATTACH_TYPE*) - from the map *MAP*. + **bpftool prog attach** *PROG* *ATTACH_TYPE* [*MAP*] + Attach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + attached to current networking name space. + + **bpftool prog detach** *PROG* *ATTACH_TYPE* [*MAP*] + Detach bpf program *PROG* (with type specified by + *ATTACH_TYPE*). Most *ATTACH_TYPEs* require a *MAP* + parameter, with the exception of *flow_dissector* which is + detached from the current networking name space. + + **bpftool prog tracelog** + Dump the trace pipe of the system to the console (stdout). + Hit <Ctrl+C> to stop printing. BPF programs can write to this + trace pipe at runtime with the **bpf_trace_printk()** helper. + This should be used only for debugging purposes. For + streaming data from BPF programs to user space, one can use + perf events (see also **bpftool-map**\ (8)). **bpftool prog help** Print short help message. @@ -124,86 +155,108 @@ OPTIONS Generate human-readable JSON output. Implies **-j**. -f, --bpffs - Show file names of pinned programs. + When showing BPF programs, show file names of pinned + programs. + + -m, --mapcompat + Allow loading maps with unknown map definitions. + + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. EXAMPLES ======== **# bpftool prog show** + :: - 10: xdp name some_prog tag 005a3d2123620c8b gpl - loaded_at Sep 29/20:11 uid 0 - xlated 528B jited 370B memlock 4096B map_ids 10 + 10: xdp name some_prog tag 005a3d2123620c8b gpl + loaded_at 2017-09-29T20:11:00+0000 uid 0 + xlated 528B jited 370B memlock 4096B map_ids 10 **# bpftool --json --pretty prog show** :: - { - "programs": [{ - "id": 10, - "type": "xdp", - "tag": "005a3d2123620c8b", - "gpl_compatible": true, - "loaded_at": "Sep 29/20:11", - "uid": 0, - "bytes_xlated": 528, - "jited": true, - "bytes_jited": 370, - "bytes_memlock": 4096, - "map_ids": [10 - ] - } - ] - } + [{ + "id": 10, + "type": "xdp", + "tag": "005a3d2123620c8b", + "gpl_compatible": true, + "loaded_at": 1506715860, + "uid": 0, + "bytes_xlated": 528, + "jited": true, + "bytes_jited": 370, + "bytes_memlock": 4096, + "map_ids": [10 + ] + } + ] | | **# bpftool prog dump xlated id 10 file /tmp/t** | **# ls -l /tmp/t** -| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t -**# bpftool prog dum jited tag 005a3d2123620c8b** +:: + + -rw------- 1 root root 560 Jul 22 01:42 /tmp/t + +**# bpftool prog dump jited tag 005a3d2123620c8b** :: - push %rbp - mov %rsp,%rbp - sub $0x228,%rsp - sub $0x28,%rbp - mov %rbx,0x0(%rbp) + 0: push %rbp + 1: mov %rsp,%rbp + 2: sub $0x228,%rsp + 3: sub $0x28,%rbp + 4: mov %rbx,0x0(%rbp) | | **# mount -t bpf none /sys/fs/bpf/** | **# bpftool prog pin id 10 /sys/fs/bpf/prog** | **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2** | **# ls -l /sys/fs/bpf/** -| -rw------- 1 root root 0 Jul 22 01:43 prog -| -rw------- 1 root root 0 Jul 22 01:44 prog2 -**# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes** +:: + + -rw------- 1 root root 0 Jul 22 01:43 prog + -rw------- 1 root root 0 Jul 22 01:44 prog2 + +**# bpftool prog dump jited pinned /sys/fs/bpf/prog opcodes** :: - push %rbp - 55 - mov %rsp,%rbp - 48 89 e5 - sub $0x228,%rsp - 48 81 ec 28 02 00 00 - sub $0x28,%rbp - 48 83 ed 28 - mov %rbx,0x0(%rbp) - 48 89 5d 00 + 0: push %rbp + 55 + 1: mov %rsp,%rbp + 48 89 e5 + 4: sub $0x228,%rsp + 48 81 ec 28 02 00 00 + b: sub $0x28,%rbp + 48 83 ed 28 + f: mov %rbx,0x0(%rbp) + 48 89 5d 00 | | **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7** | **# bpftool prog show pinned /sys/fs/bpf/xdp1** -| 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl -| loaded_at 2018-06-25T16:17:31-0700 uid 0 -| xlated 488B jited 336B memlock 4096B map_ids 7 -| **# rm /sys/fs/bpf/xdp1** -| + +:: + + 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl + loaded_at 2018-06-25T16:17:31-0700 uid 0 + xlated 488B jited 336B memlock 4096B map_ids 7 + +**# rm /sys/fs/bpf/xdp1** SEE ALSO ======== - **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 04cd4f92ab89..e1677e81ed59 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -60,8 +60,17 @@ OPTIONS -m, --mapcompat Allow loading maps with unknown map definitions. + -n, --nomount + Do not automatically attempt to mount any virtual file system + (such as tracefs or BPF virtual file system) when necessary. + SEE ALSO ======== - **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) - **bpftool-perf**\ (8), **bpftool-net**\ (8) + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool-prog**\ (8), + **bpftool-map**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dac7eff4c7e5..492f0f24e2d3 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -35,8 +35,6 @@ $(LIBBPF)-clean: prefix ?= /usr/local bash_compdir ?= /usr/share/bash-completion/completions -CC = gcc - CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ @@ -53,7 +51,7 @@ ifneq ($(EXTRA_LDFLAGS),) LDFLAGS += $(EXTRA_LDFLAGS) endif -LIBS = -lelf -lbfd -lopcodes $(LIBBPF) +LIBS = -lelf $(LIBBPF) INSTALL ?= install RM ?= rm -f @@ -90,7 +88,16 @@ include $(wildcard $(OUTPUT)*.d) all: $(OUTPUT)bpftool -SRCS = $(wildcard *.c) +BFD_SRCS = jit_disasm.c + +SRCS = $(filter-out $(BFD_SRCS),$(wildcard *.c)) + +ifeq ($(feature-libbfd),1) +CFLAGS += -DHAVE_LIBBFD_SUPPORT +SRCS += $(BFD_SRCS) +LIBS += -lbfd -lopcodes +endif + OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 3f78e6404589..e4e4fab1b8c7 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -1,37 +1,8 @@ # bpftool(8) bash completion -*- shell-script -*- # +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) # Copyright (C) 2017-2018 Netronome Systems, Inc. # -# This software is dual licensed under the GNU General License -# Version 2, June 1991 as shown in the file COPYING in the top-level -# directory of this source tree or the BSD 2-Clause License provided -# below. You have the option to license this software under the -# complete terms of either license. -# -# The BSD 2-Clause License: -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# 1. Redistributions of source code must retain the above -# copyright notice, this list of conditions and the following -# disclaimer. -# -# 2. Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -# # Author: Quentin Monnet <quentin.monnet@netronome.com> # Takes a list of words in argument; each one of them is added to COMPREPLY if @@ -191,7 +162,7 @@ _bpftool() # Deal with simplest keywords case $prev in - help|hex|opcodes|visual) + help|hex|opcodes|visual|linum) return 0 ;; tag) @@ -243,16 +214,20 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - if [[ $command != "load" ]]; then - case $prev in - id) - _bpftool_get_prog_ids - return 0 - ;; - esac - fi + # Complete id, only for subcommands that use prog (but no map) ids + case $command in + show|list|dump|pin) + case $prev in + id) + _bpftool_get_prog_ids + return 0 + ;; + esac + ;; + esac local PROG_TYPE='id pinned tag' + local MAP_TYPE='id pinned' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -274,10 +249,10 @@ _bpftool() *) _bpftool_once_attr 'file' if _bpftool_search_list 'xlated'; then - COMPREPLY+=( $( compgen -W 'opcodes visual' -- \ + COMPREPLY+=( $( compgen -W 'opcodes visual linum' -- \ "$cur" ) ) else - COMPREPLY+=( $( compgen -W 'opcodes' -- \ + COMPREPLY+=( $( compgen -W 'opcodes linum' -- \ "$cur" ) ) fi return 0 @@ -293,23 +268,45 @@ _bpftool() return 0 ;; attach|detach) - if [[ ${#words[@]} == 7 ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - - if [[ ${#words[@]} == 6 ]]; then - COMPREPLY=( $( compgen -W "msg_verdict skb_verdict skb_parse" -- "$cur" ) ) - return 0 - fi - - if [[ $prev == "$command" ]]; then - COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) ) - return 0 - fi - return 0 + case $cword in + 3) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) + return 0 + ;; + 4) + case $prev in + id) + _bpftool_get_prog_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + 5) + COMPREPLY=( $( compgen -W 'msg_verdict skb_verdict \ + skb_parse flow_dissector' -- "$cur" ) ) + return 0 + ;; + 6) + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) + return 0 + ;; + 7) + case $prev in + id) + _bpftool_get_map_ids + ;; + pinned) + _filedir + ;; + esac + return 0 + ;; + esac ;; - load) + load|loadall) local obj if [[ ${#words[@]} -lt 6 ]]; then @@ -338,7 +335,16 @@ _bpftool() case $prev in type) - COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \ + COMPREPLY=( $( compgen -W "socket kprobe \ + kretprobe classifier flow_dissector \ + action tracepoint raw_tracepoint \ + xdp perf_event cgroup/skb cgroup/sock \ + cgroup/dev lwt_in lwt_out lwt_xmit \ + lwt_seg6local sockops sk_skb sk_msg \ + lirc_mode2 cgroup/bind4 cgroup/bind6 \ + cgroup/connect4 cgroup/connect6 \ + cgroup/sendmsg4 cgroup/sendmsg6 \ + cgroup/post_bind4 cgroup/post_bind6" -- \ "$cur" ) ) return 0 ;; @@ -346,7 +352,7 @@ _bpftool() _bpftool_get_map_ids return 0 ;; - pinned) + pinned|pinmaps) _filedir return 0 ;; @@ -358,14 +364,18 @@ _bpftool() COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) _bpftool_once_attr 'type' _bpftool_once_attr 'dev' + _bpftool_once_attr 'pinmaps' return 0 ;; esac ;; + tracelog) + return 0 + ;; *) [[ $prev == $object ]] && \ COMPREPLY=( $( compgen -W 'dump help pin attach detach load \ - show list' -- "$cur" ) ) + show list tracelog' -- "$cur" ) ) ;; esac ;; @@ -400,7 +410,7 @@ _bpftool() lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap sockmap cpumap xskmap \ sockhash cgroup_storage reuseport_sockarray \ - percpu_cgroup_storage' -- \ + percpu_cgroup_storage queue stack' -- \ "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 55bc512a1831..3f0629edbca5 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ #include <ctype.h> @@ -32,7 +32,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw, } static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, - const void *data) + __u8 bit_offset, const void *data) { int actual_type_id; @@ -40,7 +40,7 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id, if (actual_type_id < 0) return actual_type_id; - return btf_dumper_do_type(d, actual_type_id, 0, data); + return btf_dumper_do_type(d, actual_type_id, bit_offset, data); } static void btf_dumper_enum(const void *data, json_writer_t *jw) @@ -73,20 +73,17 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id, return ret; } -static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, +static void btf_dumper_bitfield(__u32 nr_bits, __u8 bit_offset, const void *data, json_writer_t *jw, bool is_plain_text) { int left_shift_bits, right_shift_bits; - int nr_bits = BTF_INT_BITS(int_type); - int total_bits_offset; int bytes_to_copy; int bits_to_copy; __u64 print_num; - total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); - data += BITS_ROUNDDOWN_BYTES(total_bits_offset); - bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + data += BITS_ROUNDDOWN_BYTES(bit_offset); + bit_offset = BITS_PER_BYTE_MASKED(bit_offset); bits_to_copy = bit_offset + nr_bits; bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy); @@ -109,6 +106,22 @@ static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, jsonw_printf(jw, "%llu", print_num); } + +static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset, + const void *data, json_writer_t *jw, + bool is_plain_text) +{ + int nr_bits = BTF_INT_BITS(int_type); + int total_bits_offset; + + /* bits_offset is at most 7. + * BTF_INT_OFFSET() cannot exceed 64 bits. + */ + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type); + btf_dumper_bitfield(nr_bits, total_bits_offset, data, jw, + is_plain_text); +} + static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset, const void *data, json_writer_t *jw, bool is_plain_text) @@ -180,6 +193,7 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, const struct btf_type *t; struct btf_member *m; const void *data_off; + int kind_flag; int ret = 0; int i, vlen; @@ -187,18 +201,32 @@ static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id, if (!t) return -EINVAL; + kind_flag = BTF_INFO_KFLAG(t->info); vlen = BTF_INFO_VLEN(t->info); jsonw_start_object(d->jw); m = (struct btf_member *)(t + 1); for (i = 0; i < vlen; i++) { - data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset); + __u32 bit_offset = m[i].offset; + __u32 bitfield_size = 0; + + if (kind_flag) { + bitfield_size = BTF_MEMBER_BITFIELD_SIZE(bit_offset); + bit_offset = BTF_MEMBER_BIT_OFFSET(bit_offset); + } + jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off)); - ret = btf_dumper_do_type(d, m[i].type, - BITS_PER_BYTE_MASKED(m[i].offset), - data_off); - if (ret) - break; + if (bitfield_size) { + btf_dumper_bitfield(bitfield_size, bit_offset, + data, d->jw, d->is_plain_text); + } else { + data_off = data + BITS_ROUNDDOWN_BYTES(bit_offset); + ret = btf_dumper_do_type(d, m[i].type, + BITS_PER_BYTE_MASKED(bit_offset), + data_off); + if (ret) + break; + } } jsonw_end_object(d->jw); @@ -237,7 +265,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id, case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: - return btf_dumper_modifier(d, type_id, data); + return btf_dumper_modifier(d, type_id, bit_offset, data); default: jsonw_printf(d->jw, "(unsupported-kind"); return -EINVAL; @@ -249,3 +277,206 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, { return btf_dumper_do_type(d, type_id, 0, data); } + +#define BTF_PRINT_ARG(...) \ + do { \ + pos += snprintf(func_sig + pos, size - pos, \ + __VA_ARGS__); \ + if (pos >= size) \ + return -1; \ + } while (0) +#define BTF_PRINT_TYPE(type) \ + do { \ + pos = __btf_dumper_type_only(btf, type, func_sig, \ + pos, size); \ + if (pos == -1) \ + return -1; \ + } while (0) + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size); + +static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id, + char *func_sig, int pos, int size) +{ + const struct btf_type *proto_type; + const struct btf_array *array; + const struct btf_type *t; + + if (!type_id) { + BTF_PRINT_ARG("void "); + return pos; + } + + t = btf__type_by_id(btf, type_id); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_TYPEDEF: + BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_STRUCT: + BTF_PRINT_ARG("struct %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_UNION: + BTF_PRINT_ARG("union %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ENUM: + BTF_PRINT_ARG("enum %s ", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_ARRAY: + array = (struct btf_array *)(t + 1); + BTF_PRINT_TYPE(array->type); + BTF_PRINT_ARG("[%d]", array->nelems); + break; + case BTF_KIND_PTR: + BTF_PRINT_TYPE(t->type); + BTF_PRINT_ARG("* "); + break; + case BTF_KIND_FWD: + BTF_PRINT_ARG("%s %s ", + BTF_INFO_KFLAG(t->info) ? "union" : "struct", + btf__name_by_offset(btf, t->name_off)); + break; + case BTF_KIND_VOLATILE: + BTF_PRINT_ARG("volatile "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_CONST: + BTF_PRINT_ARG("const "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_RESTRICT: + BTF_PRINT_ARG("restrict "); + BTF_PRINT_TYPE(t->type); + break; + case BTF_KIND_FUNC_PROTO: + pos = btf_dump_func(btf, func_sig, t, NULL, pos, size); + if (pos == -1) + return -1; + break; + case BTF_KIND_FUNC: + proto_type = btf__type_by_id(btf, t->type); + pos = btf_dump_func(btf, func_sig, proto_type, t, pos, size); + if (pos == -1) + return -1; + break; + case BTF_KIND_UNKN: + default: + return -1; + } + + return pos; +} + +static int btf_dump_func(const struct btf *btf, char *func_sig, + const struct btf_type *func_proto, + const struct btf_type *func, int pos, int size) +{ + int i, vlen; + + BTF_PRINT_TYPE(func_proto->type); + if (func) + BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, func->name_off)); + else + BTF_PRINT_ARG("("); + vlen = BTF_INFO_VLEN(func_proto->info); + for (i = 0; i < vlen; i++) { + struct btf_param *arg = &((struct btf_param *)(func_proto + 1))[i]; + + if (i) + BTF_PRINT_ARG(", "); + if (arg->type) { + BTF_PRINT_TYPE(arg->type); + BTF_PRINT_ARG("%s", + btf__name_by_offset(btf, arg->name_off)); + } else { + BTF_PRINT_ARG("..."); + } + } + BTF_PRINT_ARG(")"); + + return pos; +} + +void btf_dumper_type_only(const struct btf *btf, __u32 type_id, char *func_sig, + int size) +{ + int err; + + func_sig[0] = '\0'; + if (!btf) + return; + + err = __btf_dumper_type_only(btf, type_id, func_sig, 0, size); + if (err < 0) + func_sig[0] = '\0'; +} + +static const char *ltrim(const char *s) +{ + while (isspace(*s)) + s++; + + return s; +} + +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (!line) + return; + line = ltrim(line); + + if (!prefix) + prefix = ""; + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + /* More forgiving on file because linum option is + * expected to provide more info than the already + * available src line. + */ + if (!file) + file = ""; + + printf("%s%s [file:%s line_num:%u line_col:%u]\n", + prefix, line, file, + BPF_LINE_INFO_LINE_NUM(linfo->line_col), + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } else { + printf("%s%s\n", prefix, line); + } +} + +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum) +{ + const char *line = btf__name_by_offset(btf, linfo->line_off); + + if (line) + jsonw_string_field(json_wtr, "src", ltrim(line)); + + if (linum) { + const char *file = btf__name_by_offset(btf, linfo->file_name_off); + + if (file) + jsonw_string_field(json_wtr, "file", file); + + if (BPF_LINE_INFO_LINE_NUM(linfo->line_col)) + jsonw_int_field(json_wtr, "line_num", + BPF_LINE_INFO_LINE_NUM(linfo->line_col)); + + if (BPF_LINE_INFO_LINE_COL(linfo->line_col)) + jsonw_int_field(json_wtr, "line_col", + BPF_LINE_INFO_LINE_COL(linfo->line_col)); + } +} diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c index f30b3a4a840b..31f0db41513f 100644 --- a/tools/bpf/bpftool/cfg.c +++ b/tools/bpf/bpftool/cfg.c @@ -1,39 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #include <linux/list.h> #include <stdlib.h> diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h index 2cc9bd990b13..e144257ea6d2 100644 --- a/tools/bpf/bpftool/cfg.h +++ b/tools/bpf/bpftool/cfg.h @@ -1,39 +1,5 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_CFG_H #define __BPF_TOOL_CFG_H diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index ee7a9765c6b3..4b5c8da2a7c0 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2017 Facebook // Author: Roman Gushchin <guro@fb.com> diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 25af85304ebe..897483457bf0 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #include <ctype.h> #include <errno.h> @@ -46,8 +16,8 @@ #include <linux/magic.h> #include <net/if.h> #include <sys/mount.h> +#include <sys/resource.h> #include <sys/stat.h> -#include <sys/types.h> #include <sys/vfs.h> #include <bpf.h> @@ -58,7 +28,7 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif -void p_err(const char *fmt, ...) +void __printf(1, 2) p_err(const char *fmt, ...) { va_list ap; @@ -76,7 +46,7 @@ void p_err(const char *fmt, ...) va_end(ap); } -void p_info(const char *fmt, ...) +void __printf(1, 2) p_info(const char *fmt, ...) { va_list ap; @@ -99,7 +69,15 @@ static bool is_bpffs(char *path) return (unsigned long)st_fs.f_type == BPF_FS_MAGIC; } -static int mnt_bpffs(const char *target, char *buff, size_t bufflen) +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static int +mnt_fs(const char *target, const char *type, char *buff, size_t bufflen) { bool bind_done = false; @@ -121,25 +99,40 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen) bind_done = true; } - if (mount("bpf", target, "bpf", 0, "mode=0700")) { - snprintf(buff, bufflen, "mount -t bpf bpf %s failed: %s", - target, strerror(errno)); + if (mount(type, target, type, 0, "mode=0700")) { + snprintf(buff, bufflen, "mount -t %s %s %s failed: %s", + type, type, target, strerror(errno)); return -1; } return 0; } -int open_obj_pinned(char *path) +int mount_tracefs(const char *target) +{ + char err_str[ERR_MAX_LEN]; + int err; + + err = mnt_fs(target, "tracefs", err_str, ERR_MAX_LEN); + if (err) { + err_str[ERR_MAX_LEN - 1] = '\0'; + p_err("can't mount tracefs: %s", err_str); + } + + return err; +} + +int open_obj_pinned(char *path, bool quiet) { int fd; fd = bpf_obj_get(path); if (fd < 0) { - p_err("bpf obj get (%s): %s", path, - errno == EACCES && !is_bpffs(dirname(path)) ? - "directory not in bpf file system (bpffs)" : - strerror(errno)); + if (!quiet) + p_err("bpf obj get (%s): %s", path, + errno == EACCES && !is_bpffs(dirname(path)) ? + "directory not in bpf file system (bpffs)" : + strerror(errno)); return -1; } @@ -151,7 +144,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) enum bpf_obj_type type; int fd; - fd = open_obj_pinned(path); + fd = open_obj_pinned(path, false); if (fd < 0) return -1; @@ -169,34 +162,29 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type) return fd; } -int do_pin_fd(int fd, const char *name) +int mount_bpffs_for_pin(const char *name) { char err_str[ERR_MAX_LEN]; char *file; char *dir; int err = 0; - err = bpf_obj_pin(fd, name); - if (!err) - goto out; - file = malloc(strlen(name) + 1); strcpy(file, name); dir = dirname(file); - if (errno != EPERM || is_bpffs(dir)) { - p_err("can't pin the object (%s): %s", name, strerror(errno)); + if (is_bpffs(dir)) + /* nothing to do if already mounted */ + goto out_free; + + if (block_mount) { + p_err("no BPF file system found, not mounting it due to --nomount option"); + err = -1; goto out_free; } - /* Attempt to mount bpffs, then retry pinning. */ - err = mnt_bpffs(dir, err_str, ERR_MAX_LEN); - if (!err) { - err = bpf_obj_pin(fd, name); - if (err) - p_err("can't pin the object (%s): %s", name, - strerror(errno)); - } else { + err = mnt_fs(dir, "bpf", err_str, ERR_MAX_LEN); + if (err) { err_str[ERR_MAX_LEN - 1] = '\0'; p_err("can't mount BPF file system to pin the object (%s): %s", name, err_str); @@ -204,10 +192,20 @@ int do_pin_fd(int fd, const char *name) out_free: free(file); -out: return err; } +int do_pin_fd(int fd, const char *name) +{ + int err; + + err = mount_bpffs_for_pin(name); + if (err) + return err; + + return bpf_obj_pin(fd, name); +} + int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) { unsigned int id; @@ -268,7 +266,7 @@ int get_fd_type(int fd) char buf[512]; ssize_t n; - snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); n = readlink(path, buf, sizeof(buf)); if (n < 0) { @@ -296,7 +294,7 @@ char *get_fdinfo(int fd, const char *key) ssize_t n; FILE *fdi; - snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd); + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); fdi = fopen(path, "r"); if (!fdi) { @@ -304,7 +302,7 @@ char *get_fdinfo(int fd, const char *key) return NULL; } - while ((n = getline(&line, &line_n, fdi))) { + while ((n = getline(&line, &line_n, fdi)) > 0) { char *value; int len; @@ -384,7 +382,7 @@ int build_pinned_obj_table(struct pinned_obj_table *tab, while ((ftse = fts_read(fts))) { if (!(ftse->fts_info & FTS_F)) continue; - fd = open_obj_pinned(ftse->fts_path); + fd = open_obj_pinned(ftse->fts_path, true); if (fd < 0) continue; @@ -597,7 +595,7 @@ void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode) if (!ifindex) return; - printf(" dev "); + printf(" offloaded_to "); if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name)) printf("%s", name); else diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index c75ffd9ce2bb..3ef3093560ba 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* * Based on: * @@ -19,29 +20,21 @@ #include <string.h> #include <bfd.h> #include <dis-asm.h> -#include <sys/types.h> #include <sys/stat.h> #include <limits.h> +#include <libbpf.h> #include "json_writer.h" #include "main.h" static void get_exec_path(char *tpath, size_t size) { + const char *path = "/proc/self/exe"; ssize_t len; - char *path; - - snprintf(tpath, size, "/proc/%d/exe", (int) getpid()); - tpath[size - 1] = 0; - - path = strdup(tpath); - assert(path); len = readlink(path, tpath, size - 1); assert(len > 0); tpath[len] = 0; - - free(path); } static int oper_count; @@ -77,10 +70,16 @@ static int fprintf_json(void *out, const char *fmt, ...) } void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options) + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) { + const struct bpf_line_info *linfo = NULL; disassembler_ftype disassemble; struct disassemble_info info; + unsigned int nr_skip = 0; int count, i, pc = 0; char tpath[PATH_MAX]; bfd *bfdf; @@ -109,7 +108,7 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (inf) { bfdf->arch_info = inf; } else { - p_err("No libfd support for %s", arch); + p_err("No libbfd support for %s", arch); return; } } @@ -136,12 +135,26 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (json_output) jsonw_start_array(json_wtr); do { + if (prog_linfo) { + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + func_ksym + pc, + func_idx, + nr_skip); + if (linfo) + nr_skip++; + } + if (json_output) { jsonw_start_object(json_wtr); oper_count = 0; + if (linfo) + btf_dump_linfo_json(btf, linfo, linum); jsonw_name(json_wtr, "pc"); jsonw_printf(json_wtr, "\"0x%x\"", pc); } else { + if (linfo) + btf_dump_linfo_plain(btf, linfo, "; ", + linum); printf("%4x:\t", pc); } @@ -183,3 +196,9 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, bfd_close(bfdf); } + +int disasm_init(void) +{ + bfd_init(); + return 0; +} diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c index c6eef76322ae..bff7ee026680 100644 --- a/tools/bpf/bpftool/json_writer.c +++ b/tools/bpf/bpftool/json_writer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* * Simple streaming JSON writer * @@ -19,6 +20,7 @@ #include <malloc.h> #include <inttypes.h> #include <stdint.h> +#include <linux/compiler.h> #include "json_writer.h" @@ -156,7 +158,8 @@ void jsonw_name(json_writer_t *self, const char *name) putc(' ', self->out); } -void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) +void __printf(2, 0) +jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) { jsonw_eor(self); putc('"', self->out); @@ -164,7 +167,7 @@ void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap) putc('"', self->out); } -void jsonw_printf(json_writer_t *self, const char *fmt, ...) +void __printf(2, 3) jsonw_printf(json_writer_t *self, const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h index 0fa2fb1b6351..c1ab51aed99c 100644 --- a/tools/bpf/bpftool/json_writer.h +++ b/tools/bpf/bpftool/json_writer.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* * Simple streaming JSON writer * diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 75a3296dc0bc..f44a1c2c4ea0 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -1,37 +1,6 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <bfd.h> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ + #include <ctype.h> #include <errno.h> #include <getopt.h> @@ -55,6 +24,7 @@ json_writer_t *json_wtr; bool pretty_output; bool json_output; bool show_pinned; +bool block_mount; int bpf_flags; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -344,6 +314,7 @@ int main(int argc, char **argv) { "version", no_argument, NULL, 'V' }, { "bpffs", no_argument, NULL, 'f' }, { "mapcompat", no_argument, NULL, 'm' }, + { "nomount", no_argument, NULL, 'n' }, { 0 } }; int opt, ret; @@ -352,13 +323,14 @@ int main(int argc, char **argv) pretty_output = false; json_output = false; show_pinned = false; + block_mount = false; bin_name = argv[0]; hash_init(prog_table.table); hash_init(map_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "Vhpjfm", + while ((opt = getopt_long(argc, argv, "Vhpjfmn", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -385,6 +357,9 @@ int main(int argc, char **argv) case 'm': bpf_flags = MAPS_RELAX_COMPAT; break; + case 'n': + block_mount = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -399,8 +374,6 @@ int main(int argc, char **argv) if (argc < 0) usage(); - bfd_init(); - ret = cmd_select(cmds, argc, argv, do_help); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 28322ace2856..052c91d4dc55 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_H #define __BPF_TOOL_H @@ -74,10 +44,37 @@ #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} | {-m|--mapcompat}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ + "\t {-m|--mapcompat} | {-n|--nomount} }" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE }" +static const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", +}; + enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, @@ -89,6 +86,7 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern bool block_mount; extern int bpf_flags; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; @@ -100,6 +98,10 @@ bool is_prefix(const char *pfx, const char *str); void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep); void usage(void) __noreturn; +void set_max_rlimit(void); + +int mount_tracefs(const char *target); + struct pinned_obj_table { DECLARE_HASHTABLE(table, 16); }; @@ -110,6 +112,9 @@ struct pinned_obj { struct hlist_node hash; }; +struct btf; +struct bpf_line_info; + int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); @@ -127,8 +132,9 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); -int open_obj_pinned(char *path); +int open_obj_pinned(char *path, bool quiet); int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); +int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); int do_pin_fd(int fd, const char *name); @@ -138,14 +144,38 @@ int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); int do_net(int argc, char **arg); +int do_tracelog(int argc, char **arg); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); +struct bpf_prog_linfo; +#ifdef HAVE_LIBBFD_SUPPORT +void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum); +int disasm_init(void); +#else +static inline void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, - const char *arch, const char *disassembler_options); + const char *arch, const char *disassembler_options, + const struct btf *btf, + const struct bpf_prog_linfo *prog_linfo, + __u64 func_ksym, unsigned int func_idx, + bool linum) +{ +} +static inline int disasm_init(void) +{ + p_err("No libbfd support"); + return -1; +} +#endif void print_data_json(uint8_t *data, size_t len); void print_hex_data_json(uint8_t *data, size_t len); @@ -170,6 +200,14 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); +void btf_dumper_type_only(const struct btf *btf, __u32 func_type_id, + char *func_only, int size); + +void btf_dump_linfo_plain(const struct btf *btf, + const struct bpf_line_info *linfo, + const char *prefix, bool linum); +void btf_dump_linfo_json(const struct btf *btf, + const struct bpf_line_info *linfo, bool linum); struct nlattr; struct ifinfomsg; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 7bf38f0e152e..2037e3dc864b 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #include <assert.h> #include <errno.h> @@ -52,28 +22,30 @@ #include "main.h" static const char * const map_type_name[] = { - [BPF_MAP_TYPE_UNSPEC] = "unspec", - [BPF_MAP_TYPE_HASH] = "hash", - [BPF_MAP_TYPE_ARRAY] = "array", - [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", - [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", - [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", - [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", - [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", - [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", - [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", - [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", - [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", - [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", - [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", - [BPF_MAP_TYPE_DEVMAP] = "devmap", - [BPF_MAP_TYPE_SOCKMAP] = "sockmap", - [BPF_MAP_TYPE_CPUMAP] = "cpumap", - [BPF_MAP_TYPE_XSKMAP] = "xskmap", - [BPF_MAP_TYPE_SOCKHASH] = "sockhash", - [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", - [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", + [BPF_MAP_TYPE_UNSPEC] = "unspec", + [BPF_MAP_TYPE_HASH] = "hash", + [BPF_MAP_TYPE_ARRAY] = "array", + [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", + [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", + [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", + [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", + [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", + [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", + [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", + [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", + [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", + [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", + [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", + [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_SOCKMAP] = "sockmap", + [BPF_MAP_TYPE_CPUMAP] = "cpumap", + [BPF_MAP_TYPE_XSKMAP] = "xskmap", + [BPF_MAP_TYPE_SOCKHASH] = "sockhash", + [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", + [BPF_MAP_TYPE_QUEUE] = "queue", + [BPF_MAP_TYPE_STACK] = "stack", }; static bool map_is_per_cpu(__u32 type) @@ -215,70 +187,6 @@ err_end_obj: return ret; } -static int get_btf(struct bpf_map_info *map_info, struct btf **btf) -{ - struct bpf_btf_info btf_info = { 0 }; - __u32 len = sizeof(btf_info); - __u32 last_size; - int btf_fd; - void *ptr; - int err; - - err = 0; - *btf = NULL; - btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id); - if (btf_fd < 0) - return 0; - - /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so - * let's start with a sane default - 4KiB here - and resize it only if - * bpf_obj_get_info_by_fd() needs a bigger buffer. - */ - btf_info.btf_size = 4096; - last_size = btf_info.btf_size; - ptr = malloc(last_size); - if (!ptr) { - err = -ENOMEM; - goto exit_free; - } - - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - - if (!err && btf_info.btf_size > last_size) { - void *temp_ptr; - - last_size = btf_info.btf_size; - temp_ptr = realloc(ptr, last_size); - if (!temp_ptr) { - err = -ENOMEM; - goto exit_free; - } - ptr = temp_ptr; - bzero(ptr, last_size); - btf_info.btf = ptr_to_u64(ptr); - err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); - } - - if (err || btf_info.btf_size > last_size) { - err = errno; - goto exit_free; - } - - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); - if (IS_ERR(*btf)) { - err = PTR_ERR(*btf); - *btf = NULL; - } - -exit_free: - close(btf_fd); - free(ptr); - - return err; -} - static json_writer_t *get_btf_writer(void) { json_writer_t *jw = jsonw_new(stdout); @@ -383,7 +291,10 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, printf(single_line ? " " : "\n"); printf("value:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, value, info->value_size, " "); + if (value) + fprint_hex(stdout, value, info->value_size, " "); + else + printf("<no entry>"); printf("\n"); } else { @@ -398,8 +309,11 @@ static void print_entry_plain(struct bpf_map_info *info, unsigned char *key, for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - fprint_hex(stdout, value + i * step, - info->value_size, " "); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf("<no entry>"); printf("\n"); } } @@ -543,7 +457,6 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); jsonw_start_object(json_wtr); @@ -570,6 +483,30 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock)); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + jsonw_string_field(json_wtr, "owner_prog_type", + prog_type_name[prog_type]); + else + jsonw_uint_field(json_wtr, "owner_prog_type", + prog_type); + } + if (atoi(owner_jited)) + jsonw_bool_field(json_wtr, "owner_jited", true); + else + jsonw_bool_field(json_wtr, "owner_jited", false); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -592,7 +529,6 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) char *memlock; memlock = get_fdinfo(fd, "memlock"); - close(fd); printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) @@ -613,6 +549,30 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf(" memlock %sB", memlock); free(memlock); + if (info->type == BPF_MAP_TYPE_PROG_ARRAY) { + char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); + char *owner_jited = get_fdinfo(fd, "owner_jited"); + + printf("\n\t"); + if (owner_prog_type) { + unsigned int prog_type = atoi(owner_prog_type); + + if (prog_type < ARRAY_SIZE(prog_type_name)) + printf("owner_prog_type %s ", + prog_type_name[prog_type]); + else + printf("owner_prog_type %d ", prog_type); + } + if (atoi(owner_jited)) + printf("owner jited"); + else + printf("owner not jited"); + + free(owner_prog_type); + free(owner_jited); + } + close(fd); + printf("\n"); if (!hash_empty(map_table.table)) { struct pinned_obj *obj; @@ -731,7 +691,11 @@ static int dump_map_elem(int fd, void *key, void *value, jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); jsonw_end_object(json_wtr); } else { - print_entry_error(map_info, key, strerror(lookup_errno)); + if (errno == ENOENT) + print_entry_plain(map_info, key, NULL); + else + print_entry_error(map_info, key, + strerror(lookup_errno)); } return 0; @@ -765,7 +729,7 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = get_btf(&info, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -909,7 +873,7 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = get_btf(&info, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -1140,6 +1104,8 @@ static int do_create(int argc, char **argv) return -1; } + set_max_rlimit(); + fd = bpf_create_map_xattr(&attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index bdaf4062e26e..0507dfaf7a8f 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ /* This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index d441bb7035ca..db0e7de49d49 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook #define _GNU_SOURCE diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c index 4e9f4531269f..550a0f537eed 100644 --- a/tools/bpf/bpftool/netlink_dumper.c +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook #include <stdlib.h> diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h index e3516b586a34..774af6c62ef5 100644 --- a/tools/bpf/bpftool/netlink_dumper.h +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ // Copyright (C) 2018 Facebook #ifndef _NETLINK_DUMPER_H_ diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index b76b77dcfd1f..f2a545e667c4 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (C) 2018 Facebook // Author: Yonghong Song <yhs@fb.com> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 5302ee282409..2d1bb7d6ff51 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1,35 +1,5 @@ -/* - * Copyright (C) 2017-2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2017-2018 Netronome Systems, Inc. */ #define _GNU_SOURCE #include <errno.h> @@ -47,44 +17,22 @@ #include <linux/err.h> #include <bpf.h> +#include <btf.h> #include <libbpf.h> #include "cfg.h" #include "main.h" #include "xlated_dumper.h" -static const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", -}; - static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", [BPF_SK_MSG_VERDICT] = "msg_verdict", + [BPF_FLOW_DISSECTOR] = "flow_dissector", [__MAX_BPF_ATTACH_TYPE] = NULL, }; -enum bpf_attach_type parse_attach_type(const char *str) +static enum bpf_attach_type parse_attach_type(const char *str) { enum bpf_attach_type type; @@ -357,10 +305,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (!hash_empty(prog_table.table)) { struct pinned_obj *obj; - printf("\n"); hash_for_each_possible(prog_table.table, obj, hash, info->id) { if (obj->id == info->id) - printf("\tpinned %s\n", obj->path); + printf("\n\tpinned %s", obj->path); } } @@ -446,6 +393,10 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { + unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; + void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; + unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0; + struct bpf_prog_linfo *prog_linfo = NULL; unsigned long *func_ksyms = NULL; struct bpf_prog_info info = {}; unsigned int *func_lens = NULL; @@ -454,11 +405,14 @@ static int do_dump(int argc, char **argv) unsigned int nr_func_lens; struct dump_data dd = {}; __u32 len = sizeof(info); + struct btf *btf = NULL; unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; + char func_sig[1024]; unsigned char *buf; + bool linum = false; __u32 *member_len; __u64 *member_ptr; ssize_t n; @@ -466,6 +420,9 @@ static int do_dump(int argc, char **argv) int fd; if (is_prefix(*argv, "jited")) { + if (disasm_init()) + return -1; + member_len = &info.jited_prog_len; member_ptr = &info.jited_prog_insns; } else if (is_prefix(*argv, "xlated")) { @@ -499,6 +456,9 @@ static int do_dump(int argc, char **argv) } else if (is_prefix(*argv, "visual")) { visual = true; NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); } if (argc) { @@ -547,6 +507,43 @@ static int do_dump(int argc, char **argv) } } + nr_finfo = info.nr_func_info; + finfo_rec_size = info.func_info_rec_size; + if (nr_finfo && finfo_rec_size) { + func_info = malloc(nr_finfo * finfo_rec_size); + if (!func_info) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + linfo_rec_size = info.line_info_rec_size; + if (info.nr_line_info && linfo_rec_size && info.btf_id) { + nr_linfo = info.nr_line_info; + linfo = malloc(nr_linfo * linfo_rec_size); + if (!linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + + jited_linfo_rec_size = info.jited_line_info_rec_size; + if (info.nr_jited_line_info && + jited_linfo_rec_size && + info.nr_jited_ksyms && + info.nr_jited_func_lens && + info.btf_id) { + nr_jited_linfo = info.nr_jited_line_info; + jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size); + if (!jited_linfo) { + p_err("mem alloc failed"); + close(fd); + goto err_free; + } + } + memset(&info, 0, sizeof(info)); *member_ptr = ptr_to_u64(buf); @@ -555,6 +552,15 @@ static int do_dump(int argc, char **argv) info.nr_jited_ksyms = nr_func_ksyms; info.jited_func_lens = ptr_to_u64(func_lens); info.nr_jited_func_lens = nr_func_lens; + info.nr_func_info = nr_finfo; + info.func_info_rec_size = finfo_rec_size; + info.func_info = ptr_to_u64(func_info); + info.nr_line_info = nr_linfo; + info.line_info_rec_size = linfo_rec_size; + info.line_info = ptr_to_u64(linfo); + info.nr_jited_line_info = nr_jited_linfo; + info.jited_line_info_rec_size = jited_linfo_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); err = bpf_obj_get_info_by_fd(fd, &info, &len); close(fd); @@ -578,6 +584,42 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.nr_func_info != nr_finfo) { + p_err("incorrect nr_func_info %d vs. expected %d", + info.nr_func_info, nr_finfo); + goto err_free; + } + + if (info.func_info_rec_size != finfo_rec_size) { + p_err("incorrect func_info_rec_size %d vs. expected %d", + info.func_info_rec_size, finfo_rec_size); + goto err_free; + } + + if (linfo && info.nr_line_info != nr_linfo) { + p_err("incorrect nr_line_info %u vs. expected %u", + info.nr_line_info, nr_linfo); + goto err_free; + } + + if (info.line_info_rec_size != linfo_rec_size) { + p_err("incorrect line_info_rec_size %u vs. expected %u", + info.line_info_rec_size, linfo_rec_size); + goto err_free; + } + + if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) { + p_err("incorrect nr_jited_line_info %u vs. expected %u", + info.nr_jited_line_info, nr_jited_linfo); + goto err_free; + } + + if (info.jited_line_info_rec_size != jited_linfo_rec_size) { + p_err("incorrect jited_line_info_rec_size %u vs. expected %u", + info.jited_line_info_rec_size, jited_linfo_rec_size); + goto err_free; + } + if ((member_len == &info.jited_prog_len && info.jited_prog_insns == 0) || (member_len == &info.xlated_prog_len && @@ -586,6 +628,17 @@ static int do_dump(int argc, char **argv) goto err_free; } + if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { + p_err("failed to get btf"); + goto err_free; + } + + if (nr_linfo) { + prog_linfo = bpf_prog_linfo__new(&info); + if (!prog_linfo) + p_info("error in processing bpf_line_info. continue without it."); + } + if (filepath) { fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (fd < 0) { @@ -618,6 +671,7 @@ static int do_dump(int argc, char **argv) if (info.nr_jited_func_lens && info.jited_func_lens) { struct kernel_sym *sym = NULL; + struct bpf_func_info *record; char sym_name[SYM_MAX_NAME]; unsigned char *img = buf; __u64 *ksyms = NULL; @@ -644,17 +698,33 @@ static int do_dump(int argc, char **argv) strcpy(sym_name, "unknown"); } + if (func_info) { + record = func_info + i * finfo_rec_size; + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + } + if (json_output) { jsonw_start_object(json_wtr); + if (func_info && func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } jsonw_name(json_wtr, "name"); jsonw_string(json_wtr, sym_name); jsonw_name(json_wtr, "insns"); } else { + if (func_info && func_sig[0] != '\0') + printf("%s:\n", func_sig); printf("%s:\n", sym_name); } - disasm_print_insn(img, lens[i], opcodes, name, - disasm_opt); + disasm_print_insn(img, lens[i], opcodes, + name, disasm_opt, btf, + prog_linfo, ksyms[i], i, + linum); + img += lens[i]; if (json_output) @@ -667,7 +737,7 @@ static int do_dump(int argc, char **argv) jsonw_end_array(json_wtr); } else { disasm_print_insn(buf, *member_len, opcodes, name, - disasm_opt); + disasm_opt, btf, NULL, 0, 0, false); } } else if (visual) { if (json_output) @@ -678,23 +748,37 @@ static int do_dump(int argc, char **argv) kernel_syms_load(&dd); dd.nr_jited_ksyms = info.nr_jited_ksyms; dd.jited_ksyms = (__u64 *) info.jited_ksyms; + dd.btf = btf; + dd.func_info = func_info; + dd.finfo_rec_size = finfo_rec_size; + dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, *member_len, opcodes); + dump_xlated_json(&dd, buf, *member_len, opcodes, + linum); else - dump_xlated_plain(&dd, buf, *member_len, opcodes); + dump_xlated_plain(&dd, buf, *member_len, opcodes, + linum); kernel_syms_destroy(&dd); } free(buf); free(func_ksyms); free(func_lens); + free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return 0; err_free: free(buf); free(func_ksyms); free(func_lens); + free(func_info); + free(linfo); + free(jited_linfo); + bpf_prog_linfo__free(prog_linfo); return -1; } @@ -714,37 +798,56 @@ struct map_replace { char *name; }; -int map_replace_compar(const void *p1, const void *p2) +static int map_replace_compar(const void *p1, const void *p2) { const struct map_replace *a = p1, *b = p2; return a->idx - b->idx; } -static int do_attach(int argc, char **argv) +static int parse_attach_detach_args(int argc, char **argv, int *progfd, + enum bpf_attach_type *attach_type, + int *mapfd) { - enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map attach"); + if (!REQ_ARGS(3)) return -EINVAL; - } - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; + *progfd = prog_parse_fd(&argc, &argv); + if (*progfd < 0) + return *progfd; - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); + *attach_type = parse_attach_type(*argv); + if (*attach_type == __MAX_BPF_ATTACH_TYPE) { + p_err("invalid attach/detach type"); return -EINVAL; } + + if (*attach_type == BPF_FLOW_DISSECTOR) { + *mapfd = -1; + return 0; + } + NEXT_ARG(); + if (!REQ_ARGS(2)) + return -EINVAL; + + *mapfd = map_parse_fd(&argc, &argv); + if (*mapfd < 0) + return *mapfd; + + return 0; +} - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; +static int do_attach(int argc, char **argv) +{ + enum bpf_attach_type attach_type; + int err, progfd; + int mapfd; + + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_attach(progfd, mapfd, attach_type, 0); if (err) { @@ -760,27 +863,13 @@ static int do_attach(int argc, char **argv) static int do_detach(int argc, char **argv) { enum bpf_attach_type attach_type; - int err, mapfd, progfd; - - if (!REQ_ARGS(5)) { - p_err("too few parameters for map detach"); - return -EINVAL; - } + int err, progfd; + int mapfd; - progfd = prog_parse_fd(&argc, &argv); - if (progfd < 0) - return progfd; - - attach_type = parse_attach_type(*argv); - if (attach_type == __MAX_BPF_ATTACH_TYPE) { - p_err("invalid attach type"); - return -EINVAL; - } - NEXT_ARG(); - - mapfd = map_parse_fd(&argc, &argv); - if (mapfd < 0) - return mapfd; + err = parse_attach_detach_args(argc, argv, + &progfd, &attach_type, &mapfd); + if (err) + return err; err = bpf_prog_detach2(progfd, mapfd, attach_type); if (err) { @@ -792,15 +881,17 @@ static int do_detach(int argc, char **argv) jsonw_null(json_wtr); return 0; } -static int do_load(int argc, char **argv) + +static int load_with_options(int argc, char **argv, bool first_prog_only) { enum bpf_attach_type expected_attach_type; struct bpf_object_open_attr attr = { .prog_type = BPF_PROG_TYPE_UNSPEC, }; struct map_replace *map_replace = NULL; + struct bpf_program *prog = NULL, *pos; unsigned int old_map_fds = 0; - struct bpf_program *prog; + const char *pinmaps = NULL; struct bpf_object *obj; struct bpf_map *map; const char *pinfile; @@ -845,6 +936,7 @@ static int do_load(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(*argv, "map")) { + void *new_map_replace; char *endptr, *name; int fd; @@ -878,12 +970,15 @@ static int do_load(int argc, char **argv) if (fd < 0) goto err_free_reuse_maps; - map_replace = reallocarray(map_replace, old_map_fds + 1, - sizeof(*map_replace)); - if (!map_replace) { + new_map_replace = reallocarray(map_replace, + old_map_fds + 1, + sizeof(*map_replace)); + if (!new_map_replace) { p_err("mem alloc failed"); goto err_free_reuse_maps; } + map_replace = new_map_replace; + map_replace[old_map_fds].idx = idx; map_replace[old_map_fds].name = name; map_replace[old_map_fds].fd = fd; @@ -905,6 +1000,13 @@ static int do_load(int argc, char **argv) goto err_free_reuse_maps; } NEXT_ARG(); + } else if (is_prefix(*argv, "pinmaps")) { + NEXT_ARG(); + + if (!REQ_ARGS(1)) + goto err_free_reuse_maps; + + pinmaps = GET_ARG(); } else { p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?", *argv); @@ -918,26 +1020,25 @@ static int do_load(int argc, char **argv) goto err_free_reuse_maps; } - prog = bpf_program__next(NULL, obj); - if (!prog) { - p_err("object file doesn't contain any bpf program"); - goto err_close_obj; - } + bpf_object__for_each_program(pos, obj) { + enum bpf_prog_type prog_type = attr.prog_type; - bpf_program__set_ifindex(prog, ifindex); - if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { - const char *sec_name = bpf_program__title(prog, false); + if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) { + const char *sec_name = bpf_program__title(pos, false); - err = libbpf_prog_type_by_name(sec_name, &attr.prog_type, - &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); - goto err_close_obj; + err = libbpf_prog_type_by_name(sec_name, &prog_type, + &expected_attach_type); + if (err < 0) { + p_err("failed to guess program type based on section name %s\n", + sec_name); + goto err_close_obj; + } } + + bpf_program__set_ifindex(pos, ifindex); + bpf_program__set_type(pos, prog_type); + bpf_program__set_expected_attach_type(pos, expected_attach_type); } - bpf_program__set_type(prog, attr.prog_type); - bpf_program__set_expected_attach_type(prog, expected_attach_type); qsort(map_replace, old_map_fds, sizeof(*map_replace), map_replace_compar); @@ -995,15 +1096,47 @@ static int do_load(int argc, char **argv) goto err_close_obj; } + set_max_rlimit(); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; } - if (do_pin_fd(bpf_program__fd(prog), pinfile)) + err = mount_bpffs_for_pin(pinfile); + if (err) goto err_close_obj; + if (first_prog_only) { + prog = bpf_program__next(NULL, obj); + if (!prog) { + p_err("object file doesn't contain any bpf program"); + goto err_close_obj; + } + + err = bpf_obj_pin(bpf_program__fd(prog), pinfile); + if (err) { + p_err("failed to pin program %s", + bpf_program__title(prog, false)); + goto err_close_obj; + } + } else { + err = bpf_object__pin_programs(obj, pinfile); + if (err) { + p_err("failed to pin all programs"); + goto err_close_obj; + } + } + + if (pinmaps) { + err = bpf_object__pin_maps(obj, pinmaps); + if (err) { + p_err("failed to pin all maps"); + goto err_unpin; + } + } + if (json_output) jsonw_null(json_wtr); @@ -1014,6 +1147,11 @@ static int do_load(int argc, char **argv) return 0; +err_unpin: + if (first_prog_only) + unlink(pinfile); + else + bpf_object__unpin_programs(obj, pinfile); err_close_obj: bpf_object__close(obj); err_free_reuse_maps: @@ -1023,6 +1161,16 @@ err_free_reuse_maps: return -1; } +static int do_load(int argc, char **argv) +{ + return load_with_options(argc, argv, true); +} + +static int do_loadall(int argc, char **argv) +{ + return load_with_options(argc, argv, false); +} + static int do_help(int argc, char **argv) { if (json_output) { @@ -1032,13 +1180,16 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %s %s { show | list } [PROG]\n" - " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n" - " %s %s dump jited PROG [{ file FILE | opcodes }]\n" + " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n" + " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n" " %s %s pin PROG FILE\n" - " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n" - " [map { idx IDX | name NAME } MAP]\n" - " %s %s attach PROG ATTACH_TYPE MAP\n" - " %s %s detach PROG ATTACH_TYPE MAP\n" + " %s %s { load | loadall } OBJ PATH \\\n" + " [type TYPE] [dev NAME] \\\n" + " [map { idx IDX | name NAME } MAP]\\\n" + " [pinmaps MAP_DIR]\n" + " %s %s attach PROG ATTACH_TYPE [MAP]\n" + " %s %s detach PROG ATTACH_TYPE [MAP]\n" + " %s %s tracelog\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -1047,15 +1198,17 @@ static int do_help(int argc, char **argv) " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n" " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n" " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n" + " sk_reuseport | flow_dissector |\n" " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 }\n" - " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | skb_parse |\n" + " flow_dissector }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2], bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]); return 0; } @@ -1067,8 +1220,10 @@ static const struct cmd cmds[] = { { "dump", do_dump }, { "pin", do_pin }, { "load", do_load }, + { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, + { "tracelog", do_tracelog }, { 0 } }; diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c new file mode 100644 index 000000000000..e80a5c79b38f --- /dev/null +++ b/tools/bpf/bpftool/tracelog.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2015-2017 Daniel Borkmann */ +/* Copyright (c) 2018 Netronome Systems, Inc. */ + +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <linux/magic.h> +#include <sys/fcntl.h> +#include <sys/vfs.h> + +#include "main.h" + +#ifndef TRACEFS_MAGIC +# define TRACEFS_MAGIC 0x74726163 +#endif + +#define _textify(x) #x +#define textify(x) _textify(x) + +FILE *trace_pipe_fd; +char *buff; + +static int validate_tracefs_mnt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool +find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt) +{ + size_t src_len; + + if (validate_tracefs_mnt(mntpt, magic)) + return false; + + src_len = strlen(mntpt); + if (src_len + 1 >= PATH_MAX) { + p_err("tracefs mount point name too long"); + return false; + } + + strcpy(mnt, mntpt); + return true; +} + +static bool get_tracefs_pipe(char *mnt) +{ + static const char * const known_mnts[] = { + "/sys/kernel/debug/tracing", + "/sys/kernel/tracing", + "/tracing", + "/trace", + }; + const char *pipe_name = "/trace_pipe"; + const char *fstype = "tracefs"; + char type[100], format[32]; + const char * const *ptr; + bool found = false; + FILE *fp; + + for (ptr = known_mnts; ptr < known_mnts + ARRAY_SIZE(known_mnts); ptr++) + if (find_tracefs_mnt_single(TRACEFS_MAGIC, mnt, *ptr)) + goto exit_found; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return false; + + /* Allow room for NULL terminating byte and pipe file name */ + snprintf(format, sizeof(format), "%%*s %%%zds %%99s %%*s %%*d %%*d\\n", + PATH_MAX - strlen(pipe_name) - 1); + while (fscanf(fp, format, mnt, type) == 2) + if (strcmp(type, fstype) == 0) { + found = true; + break; + } + fclose(fp); + + /* The string from fscanf() might be truncated, check mnt is valid */ + if (found && validate_tracefs_mnt(mnt, TRACEFS_MAGIC)) + goto exit_found; + + if (block_mount) + return false; + + p_info("could not find tracefs, attempting to mount it now"); + /* Most of the time, tracefs is automatically mounted by debugfs at + * /sys/kernel/debug/tracing when we try to access it. If we could not + * find it, it is likely that debugfs is not mounted. Let's give one + * attempt at mounting just tracefs at /sys/kernel/tracing. + */ + strcpy(mnt, known_mnts[1]); + if (mount_tracefs(mnt)) + return false; + +exit_found: + strcat(mnt, pipe_name); + return true; +} + +static void exit_tracelog(int signum) +{ + fclose(trace_pipe_fd); + free(buff); + + if (json_output) { + jsonw_end_array(json_wtr); + jsonw_destroy(&json_wtr); + } + + exit(0); +} + +int do_tracelog(int argc, char **argv) +{ + const struct sigaction act = { + .sa_handler = exit_tracelog + }; + char trace_pipe[PATH_MAX]; + size_t buff_len = 0; + + if (json_output) + jsonw_start_array(json_wtr); + + if (!get_tracefs_pipe(trace_pipe)) + return -1; + + trace_pipe_fd = fopen(trace_pipe, "r"); + if (!trace_pipe_fd) { + p_err("could not open trace pipe: %s", strerror(errno)); + return -1; + } + + sigaction(SIGHUP, &act, NULL); + sigaction(SIGINT, &act, NULL); + sigaction(SIGTERM, &act, NULL); + while (1) { + ssize_t ret; + + ret = getline(&buff, &buff_len, trace_pipe_fd); + if (ret <= 0) { + p_err("failed to read content from trace pipe: %s", + strerror(errno)); + break; + } + if (json_output) + jsonw_string(json_wtr, buff); + else + printf("%s", buff); + } + + fclose(trace_pipe_fd); + free(buff); + return -1; +} diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 3284759df98a..7073dbe1ff27 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -1,39 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #define _GNU_SOURCE #include <stdarg.h> @@ -41,6 +7,7 @@ #include <stdlib.h> #include <string.h> #include <sys/types.h> +#include <libbpf.h> #include "disasm.h" #include "json_writer.h" @@ -114,7 +81,7 @@ struct kernel_sym *kernel_syms_search(struct dump_data *dd, sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL; } -static void print_insn(void *private_data, const char *fmt, ...) +static void __printf(2, 3) print_insn(void *private_data, const char *fmt, ...) { va_list args; @@ -123,7 +90,7 @@ static void print_insn(void *private_data, const char *fmt, ...) va_end(args); } -static void +static void __printf(2, 3) print_insn_for_graph(void *private_data, const char *fmt, ...) { char buf[64], *p; @@ -154,7 +121,8 @@ print_insn_for_graph(void *private_data, const char *fmt, ...) printf("%s", buf); } -static void print_insn_json(void *private_data, const char *fmt, ...) +static void __printf(2, 3) +print_insn_json(void *private_data, const char *fmt, ...) { unsigned int l = strlen(fmt); char chomped_fmt[l]; @@ -234,19 +202,25 @@ static const char *print_imm(void *private_data, } void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn_json, .cb_call = print_call, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; bool double_insn = false; + unsigned int nr_skip = 0; + char func_sig[1024]; unsigned int i; jsonw_start_array(json_wtr); + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; @@ -255,6 +229,30 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); jsonw_start_object(json_wtr); + + if (btf && record) { + if (record->insn_off == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') { + jsonw_name(json_wtr, "proto"); + jsonw_string(json_wtr, func_sig); + } + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_json(btf, linfo, linum); + nr_skip++; + } + } + jsonw_name(json_wtr, "disasm"); print_bpf_insn(&cbs, insn + i, true); @@ -289,24 +287,52 @@ void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, } void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes) + bool opcodes, bool linum) { + const struct bpf_prog_linfo *prog_linfo = dd->prog_linfo; const struct bpf_insn_cbs cbs = { .cb_print = print_insn, .cb_call = print_call, .cb_imm = print_imm, .private_data = dd, }; + struct bpf_func_info *record; struct bpf_insn *insn = buf; + struct btf *btf = dd->btf; + unsigned int nr_skip = 0; bool double_insn = false; + char func_sig[1024]; unsigned int i; + record = dd->func_info; for (i = 0; i < len / sizeof(*insn); i++) { if (double_insn) { double_insn = false; continue; } + if (btf && record) { + if (record->insn_off == i) { + btf_dumper_type_only(btf, record->type_id, + func_sig, + sizeof(func_sig)); + if (func_sig[0] != '\0') + printf("%s:\n", func_sig); + record = (void *)record + dd->finfo_rec_size; + } + } + + if (prog_linfo) { + const struct bpf_line_info *linfo; + + linfo = bpf_prog_linfo__lfind(prog_linfo, i, nr_skip); + if (linfo) { + btf_dump_linfo_plain(btf, linfo, "; ", + linum); + nr_skip++; + } + } + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); printf("% 4d: ", i); diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h index 33d86e2b369b..54847e174273 100644 --- a/tools/bpf/bpftool/xlated_dumper.h +++ b/tools/bpf/bpftool/xlated_dumper.h @@ -1,45 +1,13 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* - * Copyright (C) 2018 Netronome Systems, Inc. - * - * This software is dual licensed under the GNU General License Version 2, - * June 1991 as shown in the file COPYING in the top-level directory of this - * source tree or the BSD 2-Clause License provided below. You have the - * option to license this software under the complete terms of either license. - * - * The BSD 2-Clause License: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * 1. Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * 2. Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __BPF_TOOL_XLATED_DUMPER_H #define __BPF_TOOL_XLATED_DUMPER_H #define SYM_MAX_NAME 256 +struct bpf_prog_linfo; + struct kernel_sym { unsigned long address; char name[SYM_MAX_NAME]; @@ -51,6 +19,10 @@ struct dump_data { __u32 sym_count; __u64 *jited_ksyms; __u32 nr_jited_ksyms; + struct btf *btf; + void *func_info; + __u32 finfo_rec_size; + const struct bpf_prog_linfo *prog_linfo; char scratch_buff[SYM_MAX_NAME + 8]; }; @@ -58,9 +30,9 @@ void kernel_syms_load(struct dump_data *dd); void kernel_syms_destroy(struct dump_data *dd); struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key); void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len, - bool opcodes); + bool opcodes, bool linum); void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end, unsigned int start_index); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index f216b2f5c3d7..d74bb9414d7c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -33,6 +33,7 @@ FEATURE_TESTS_BASIC := \ dwarf_getlocations \ fortify-source \ sync-compare-and-swap \ + get_current_dir_name \ glibc \ gtk2 \ gtk2-infobar \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 0516259be70f..304b984f11b9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -7,6 +7,7 @@ FILES= \ test-dwarf_getlocations.bin \ test-fortify-source.bin \ test-sync-compare-and-swap.bin \ + test-get_current_dir_name.bin \ test-glibc.bin \ test-gtk2.bin \ test-gtk2-infobar.bin \ @@ -101,6 +102,9 @@ $(OUTPUT)test-bionic.bin: $(OUTPUT)test-libelf.bin: $(BUILD) -lelf +$(OUTPUT)test-get_current_dir_name.bin: + $(BUILD) + $(OUTPUT)test-glibc.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 8dc20a61341f..56722bfe6bdd 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -34,6 +34,10 @@ # include "test-libelf-mmap.c" #undef main +#define main main_test_get_current_dir_name +# include "test-get_current_dir_name.c" +#undef main + #define main main_test_glibc # include "test-glibc.c" #undef main @@ -174,6 +178,7 @@ int main(int argc, char *argv[]) main_test_hello(); main_test_libelf(); main_test_libelf_mmap(); + main_test_get_current_dir_name(); main_test_glibc(); main_test_dwarf(); main_test_dwarf_getlocations(); diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c new file mode 100644 index 000000000000..573000f93212 --- /dev/null +++ b/tools/build/feature/test-get_current_dir_name.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <unistd.h> +#include <stdlib.h> + +int main(void) +{ + free(get_current_dir_name()); + return 0; +} diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h index 040651735662..cdc9f4ca8c27 100644 --- a/tools/include/uapi/asm-generic/ioctls.h +++ b/tools/include/uapi/asm-generic/ioctls.h @@ -79,6 +79,8 @@ #define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */ #define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */ #define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */ +#define TIOCGISO7816 _IOR('T', 0x42, struct serial_iso7816) +#define TIOCSISO7816 _IOWR('T', 0x43, struct serial_iso7816) #define FIONCLEX 0x5450 #define FIOCLEX 0x5451 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 7f5634ce8e88..a4446f452040 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 +/* + * Once upon a time we supposed that writes through the GGTT would be + * immediately in physical memory (once flushed out of the CPU path). However, + * on a few different processors and chipsets, this is not necessarily the case + * as the writes appear to be buffered internally. Thus a read of the backing + * storage (physical memory) via a different path (with different physical tags + * to the indirect write via the GGTT) will see stale values from before + * the GGTT write. Inside the kernel, we can for the most part keep track of + * the different read/write domains in use (e.g. set-domain), but the assumption + * of coherency is baked into the ABI, hence reporting its true state in this + * parameter. + * + * Reports true when writes via mmap_gtt are immediately visible following an + * lfence to flush the WCB. + * + * Reports false when writes via mmap_gtt are indeterminately delayed in an in + * internal buffer and are _not_ immediately visible to third parties accessing + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC + * communications channel when reporting false is strongly disadvised. + */ +#define I915_PARAM_MMAP_GTT_COHERENT 52 + typedef struct drm_i915_getparam { __s32 param; /* diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 852dc17ab47a..91c43884f295 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -133,6 +133,14 @@ enum bpf_map_type { BPF_MAP_TYPE_STACK, }; +/* Note that tracing related programs such as + * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT} + * are not subject to a stable API since kernel internal data + * structures can change from release to release and may + * therefore break existing tracing BPF programs. Tracing BPF + * programs correspond to /a/ specific kernel which is to be + * analyzed, and not /a/ specific kernel /and/ all future ones. + */ enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, @@ -232,6 +240,20 @@ enum bpf_attach_type { */ #define BPF_F_STRICT_ALIGNMENT (1U << 0) +/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the + * verifier will allow any alignment whatsoever. On platforms + * with strict alignment requirements for loads ands stores (such + * as sparc and mips) the verifier validates that all loads and + * stores provably follow this requirement. This flag turns that + * checking and enforcement off. + * + * It is mostly used for testing when we want to validate the + * context and memory access aspects of the verifier, but because + * of an unaligned access the alignment check would trigger before + * the one we are interested in. + */ +#define BPF_F_ANY_ALIGNMENT (1U << 1) + /* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */ #define BPF_PSEUDO_MAP_FD 1 @@ -257,9 +279,6 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) -/* flags for BPF_PROG_QUERY */ -#define BPF_F_QUERY_EFFECTIVE (1U << 0) - #define BPF_OBJ_NAME_LEN 16U /* Flags for accessing BPF object */ @@ -269,6 +288,12 @@ enum bpf_attach_type { /* Flag for stack_map, store build_id+offset instead of pointer */ #define BPF_F_STACK_BUILD_ID (1U << 5) +/* Zero-initialize hash function seed. This should only be used for testing. */ +#define BPF_F_ZERO_SEED (1U << 6) + +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -326,7 +351,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ - __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 kern_version; /* not used */ __u32 prog_flags; char prog_name[BPF_OBJ_NAME_LEN]; __u32 prog_ifindex; /* ifindex of netdev to prep for */ @@ -335,6 +360,13 @@ union bpf_attr { * (context accesses, allowed helpers, etc). */ __u32 expected_attach_type; + __u32 prog_btf_fd; /* fd pointing to BTF type data */ + __u32 func_info_rec_size; /* userspace bpf_func_info size */ + __aligned_u64 func_info; /* func info */ + __u32 func_info_cnt; /* number of bpf_func_info records */ + __u32 line_info_rec_size; /* userspace bpf_line_info size */ + __aligned_u64 line_info; /* line info */ + __u32 line_info_cnt; /* number of bpf_line_info records */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -353,8 +385,11 @@ union bpf_attr { struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ __u32 prog_fd; __u32 retval; - __u32 data_size_in; - __u32 data_size_out; + __u32 data_size_in; /* input: len of data_in */ + __u32 data_size_out; /* input/output: len of data_out + * returns ENOSPC if data_out + * is too small. + */ __aligned_u64 data_in; __aligned_u64 data_out; __u32 repeat; @@ -475,18 +510,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) - * Description - * Pop an element from *map*. - * Return - * 0 on success, or a negative error in case of failure. - * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) - * Description - * Get an element from *map* without removing it. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1910,9 +1933,9 @@ union bpf_attr { * is set to metric from route (IPv4/IPv6 only), and ifindex * is set to the device index of the nexthop from the FIB lookup. * - * *plen* argument is the size of the passed in struct. - * *flags* argument can be a combination of one or more of the - * following values: + * *plen* argument is the size of the passed in struct. + * *flags* argument can be a combination of one or more of the + * following values: * * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB @@ -1921,9 +1944,9 @@ union bpf_attr { * Perform lookup from an egress perspective (default is * ingress). * - * *ctx* is either **struct xdp_md** for XDP programs or - * **struct sk_buff** tc cls_act programs. - * Return + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct sk_buff** tc cls_act programs. + * Return * * < 0 if any input argument is invalid * * 0 on success (packet is forwarded, nexthop neighbor exists) * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the @@ -2068,8 +2091,8 @@ union bpf_attr { * translated to a keycode using the rc keymap, and reported as * an input key down event. After a period a key up event is * generated. This period can be extended by calling either - * **bpf_rc_keydown** () again with the same values, or calling - * **bpf_rc_repeat** (). + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * * Some protocols include a toggle bit, in case the button was * released and pressed again between consecutive scancodes. @@ -2152,29 +2175,30 @@ union bpf_attr { * The *flags* meaning is specific for each map type, * and has to be 0 for cgroup local storage. * - * Depending on the bpf program type, a local storage area - * can be shared between multiple instances of the bpf program, + * Depending on the BPF program type, a local storage area + * can be shared between multiple instances of the BPF program, * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the BPF_STX_XADD instruction to alter + * For example, by using the **BPF_STX_XADD** instruction to alter * the shared data. * Return - * Pointer to the local storage area. + * A pointer to the local storage area. * * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description - * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map - * It checks the selected sk is matching the incoming - * request in the skb. + * Select a **SO_REUSEPORT** socket from a + * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. + * It checks the selected socket is matching the incoming + * request in the socket buffer. * Return * 0 on success, or a negative error in case of failure. * - * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2187,12 +2211,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2200,13 +2226,15 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags) + * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for UDP socket matching *tuple*, optionally in a child * network namespace *netns*. The return value must be checked, - * and if non-NULL, released via **bpf_sk_release**\ (). + * and if non-**NULL**, released via **bpf_sk_release**\ (). * * The *ctx* should point to the context of the program, such as * the skb or socket (depending on the hook in use). This is used @@ -2219,12 +2247,14 @@ union bpf_attr { * **sizeof**\ (*tuple*\ **->ipv6**) * Look for an IPv6 socket. * - * If the *netns* is zero, then the socket lookup table in the - * netns associated with the *ctx* will be used. For the TC hooks, - * this in the netns of the device in the skb. For socket hooks, - * this in the netns of the socket. If *netns* is non-zero, then - * it specifies the ID of the netns relative to the netns - * associated with the *ctx*. + * If the *netns* is a negative signed 32-bit integer, then the + * socket lookup table in the netns associated with the *ctx* will + * will be used. For the TC hooks, this is the netns of the device + * in the skb. For socket hooks, this is the netns of the socket. + * If *netns* is any other signed 32-bit value greater than or + * equal to zero then it specifies the ID of the netns relative to + * the netns associated with the *ctx*. *netns* values beyond the + * range of 32-bit integers are reserved for future use. * * All values for *flags* are reserved for future usage, and must * be left at zero. @@ -2232,31 +2262,71 @@ union bpf_attr { * This helper is available only if the kernel was compiled with * **CONFIG_NET** configuration option. * Return - * Pointer to *struct bpf_sock*, or NULL in case of failure. + * Pointer to **struct bpf_sock**, or **NULL** in case of failure. + * For sockets with reuseport option, the **struct bpf_sock** + * result is from **reuse->socks**\ [] using the hash of the tuple. * - * int bpf_sk_release(struct bpf_sock *sk) + * int bpf_sk_release(struct bpf_sock *sock) * Description - * Release the reference held by *sock*. *sock* must be a non-NULL - * pointer that was returned from bpf_sk_lookup_xxx\ (). + * Release the reference held by *sock*. *sock* must be a + * non-**NULL** pointer that was returned from + * **bpf_sk_lookup_xxx**\ (). * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags) * Description - * For socket policies, insert *len* bytes into msg at offset + * For socket policies, insert *len* bytes into *msg* at offset * *start*. * * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a - * *msg* it may want to insert metadata or options into the msg. + * *msg* it may want to insert metadata or options into the *msg*. * This can later be read and used by any of the lower layer BPF * hooks. * * This helper may fail if under memory pressure (a malloc * fails) in these cases BPF programs will get an appropriate * error and BPF programs will need to handle them. + * Return + * 0 on success, or a negative error in case of failure. * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of *msg* + * payload and/or *pop* value being to large. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * Description + * This helper is used in programs implementing IR decoding, to + * report a successfully decoded pointer movement. + * + * The *ctx* should point to the lirc sample as passed into + * the program. + * + * This helper is only available is the kernel was compiled with + * the **CONFIG_BPF_LIRC_MODE2** configuration option set to + * "**y**". + * Return + * 0 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2349,7 +2419,9 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), \ + FN(rc_pointer_rel), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2405,6 +2477,9 @@ enum bpf_func_id { /* BPF_FUNC_perf_event_output for sk_buff input context. */ #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) +/* Current network namespace */ +#define BPF_F_CURRENT_NETNS (-1L) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, @@ -2422,6 +2497,12 @@ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6_INLINE }; +#define __bpf_md_ptr(type, name) \ +union { \ + type name; \ + __u64 :64; \ +} __attribute__((aligned(8))) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -2456,7 +2537,9 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; - struct bpf_flow_keys *flow_keys; + __bpf_md_ptr(struct bpf_flow_keys *, flow_keys); + __u64 tstamp; + __u32 wire_len; }; struct bpf_tunnel_key { @@ -2572,8 +2655,8 @@ enum sk_action { * be added to the end of this structure */ struct sk_msg_md { - void *data; - void *data_end; + __bpf_md_ptr(void *, data); + __bpf_md_ptr(void *, data_end); __u32 family; __u32 remote_ip4; /* Stored in network byte order */ @@ -2582,6 +2665,7 @@ struct sk_msg_md { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + __u32 size; /* Total size of sk_msg */ }; struct sk_reuseport_md { @@ -2589,8 +2673,9 @@ struct sk_reuseport_md { * Start of directly accessible data. It begins from * the tcp/udp header. */ - void *data; - void *data_end; /* End of directly accessible data */ + __bpf_md_ptr(void *, data); + /* End of directly accessible data */ + __bpf_md_ptr(void *, data_end); /* * Total length of packet (starting from the tcp/udp header). * Note that the directly accessible bytes (data_end - data) @@ -2631,6 +2716,18 @@ struct bpf_prog_info { __u32 nr_jited_func_lens; __aligned_u64 jited_ksyms; __aligned_u64 jited_func_lens; + __u32 btf_id; + __u32 func_info_rec_size; + __aligned_u64 func_info; + __u32 nr_func_info; + __u32 nr_line_info; + __aligned_u64 line_info; + __aligned_u64 jited_line_info; + __u32 nr_jited_line_info; + __u32 line_info_rec_size; + __u32 jited_line_info_rec_size; + __u32 nr_prog_tags; + __aligned_u64 prog_tags; } __attribute__((aligned(8))); struct bpf_map_info { @@ -2942,4 +3039,19 @@ struct bpf_flow_keys { }; }; +struct bpf_func_info { + __u32 insn_off; + __u32 type_id; +}; + +#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10) +#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff) + +struct bpf_line_info { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 972265f32871..7b7475ef2f17 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -34,13 +34,16 @@ struct btf_type { * bits 0-15: vlen (e.g. # of struct's members) * bits 16-23: unused * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-31: unused + * bits 28-30: unused + * bit 31: kind_flag, currently used by + * struct, union and fwd */ __u32 info; /* "size" is used by INT, ENUM, STRUCT and UNION. * "size" tells the size of the type it is describing. * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT. + * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, + * FUNC and FUNC_PROTO. * "type" is a type_id referring to another type. */ union { @@ -51,6 +54,7 @@ struct btf_type { #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) #define BTF_INFO_VLEN(info) ((info) & 0xffff) +#define BTF_INFO_KFLAG(info) ((info) >> 31) #define BTF_KIND_UNKN 0 /* Unknown */ #define BTF_KIND_INT 1 /* Integer */ @@ -64,8 +68,10 @@ struct btf_type { #define BTF_KIND_VOLATILE 9 /* Volatile */ #define BTF_KIND_CONST 10 /* Const */ #define BTF_KIND_RESTRICT 11 /* Restrict */ -#define BTF_KIND_MAX 11 -#define NR_BTF_KINDS 12 +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#define BTF_KIND_MAX 13 +#define NR_BTF_KINDS 14 /* For some specific BTF_KIND, "struct btf_type" is immediately * followed by extra data. @@ -107,7 +113,29 @@ struct btf_array { struct btf_member { __u32 name_off; __u32 type; - __u32 offset; /* offset in bits */ + /* If the type info kind_flag is set, the btf_member offset + * contains both member bitfield size and bit offset. The + * bitfield size is set for bitfield members. If the type + * info kind_flag is not set, the offset contains only bit + * offset. + */ + __u32 offset; +}; + +/* If the struct/union type info kind_flag is set, the + * following two macros are used to access bitfield_size + * and bit_offset from btf_member.offset. + */ +#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24) +#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff) + +/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param". + * The exact number of btf_param is stored in the vlen (of the + * info in "struct btf_type"). + */ +struct btf_param { + __u32 name_off; + __u32 type; }; #endif /* _UAPI__LINUX_BTF_H__ */ diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h index 486ed1f0c0bc..0a4d73317759 100644 --- a/tools/include/uapi/linux/netlink.h +++ b/tools/include/uapi/linux/netlink.h @@ -155,7 +155,7 @@ enum nlmsgerr_attrs { #define NETLINK_LIST_MEMBERSHIPS 9 #define NETLINK_CAP_ACK 10 #define NETLINK_EXT_ACK 11 -#define NETLINK_DUMP_STRICT_CHK 12 +#define NETLINK_GET_STRICT_CHK 12 struct nl_pktinfo { __u32 group; diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h new file mode 100644 index 000000000000..401d0c1e612d --- /dev/null +++ b/tools/include/uapi/linux/pkt_cls.h @@ -0,0 +1,612 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_PKT_CLS_H +#define __LINUX_PKT_CLS_H + +#include <linux/types.h> +#include <linux/pkt_sched.h> + +#define TC_COOKIE_MAX_SIZE 16 + +/* Action attributes */ +enum { + TCA_ACT_UNSPEC, + TCA_ACT_KIND, + TCA_ACT_OPTIONS, + TCA_ACT_INDEX, + TCA_ACT_STATS, + TCA_ACT_PAD, + TCA_ACT_COOKIE, + __TCA_ACT_MAX +}; + +#define TCA_ACT_MAX __TCA_ACT_MAX +#define TCA_OLD_COMPAT (TCA_ACT_MAX+1) +#define TCA_ACT_MAX_PRIO 32 +#define TCA_ACT_BIND 1 +#define TCA_ACT_NOBIND 0 +#define TCA_ACT_UNBIND 1 +#define TCA_ACT_NOUNBIND 0 +#define TCA_ACT_REPLACE 1 +#define TCA_ACT_NOREPLACE 0 + +#define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 +#define TC_ACT_TRAP 8 /* For hw path, this means "trap to cpu" + * and don't further process the frame + * in hardware. For sw path, this is + * equivalent of TC_ACT_STOLEN - drop + * the skb and act like everything + * is alright. + */ +#define TC_ACT_VALUE_MAX TC_ACT_TRAP + +/* There is a special kind of actions called "extended actions", + * which need a value parameter. These have a local opcode located in + * the highest nibble, starting from 1. The rest of the bits + * are used to carry the value. These two parts together make + * a combined opcode. + */ +#define __TC_ACT_EXT_SHIFT 28 +#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT) +#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1) +#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK)) +#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode) + +#define TC_ACT_JUMP __TC_ACT_EXT(1) +#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2) +#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN + +/* Action type identifiers*/ +enum { + TCA_ID_UNSPEC=0, + TCA_ID_POLICE=1, + /* other actions go here */ + __TCA_ID_MAX=255 +}; + +#define TCA_ID_MAX __TCA_ID_MAX + +struct tc_police { + __u32 index; + int action; +#define TC_POLICE_UNSPEC TC_ACT_UNSPEC +#define TC_POLICE_OK TC_ACT_OK +#define TC_POLICE_RECLASSIFY TC_ACT_RECLASSIFY +#define TC_POLICE_SHOT TC_ACT_SHOT +#define TC_POLICE_PIPE TC_ACT_PIPE + + __u32 limit; + __u32 burst; + __u32 mtu; + struct tc_ratespec rate; + struct tc_ratespec peakrate; + int refcnt; + int bindcnt; + __u32 capab; +}; + +struct tcf_t { + __u64 install; + __u64 lastuse; + __u64 expires; + __u64 firstuse; +}; + +struct tc_cnt { + int refcnt; + int bindcnt; +}; + +#define tc_gen \ + __u32 index; \ + __u32 capab; \ + int action; \ + int refcnt; \ + int bindcnt + +enum { + TCA_POLICE_UNSPEC, + TCA_POLICE_TBF, + TCA_POLICE_RATE, + TCA_POLICE_PEAKRATE, + TCA_POLICE_AVRATE, + TCA_POLICE_RESULT, + TCA_POLICE_TM, + TCA_POLICE_PAD, + __TCA_POLICE_MAX +#define TCA_POLICE_RESULT TCA_POLICE_RESULT +}; + +#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) + +/* tca flags definitions */ +#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ +#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ +#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ +#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ +#define TCA_CLS_FLAGS_VERBOSE (1 << 4) /* verbose logging */ + +/* U32 filters */ + +#define TC_U32_HTID(h) ((h)&0xFFF00000) +#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20) +#define TC_U32_HASH(h) (((h)>>12)&0xFF) +#define TC_U32_NODE(h) ((h)&0xFFF) +#define TC_U32_KEY(h) ((h)&0xFFFFF) +#define TC_U32_UNSPEC 0 +#define TC_U32_ROOT (0xFFF00000) + +enum { + TCA_U32_UNSPEC, + TCA_U32_CLASSID, + TCA_U32_HASH, + TCA_U32_LINK, + TCA_U32_DIVISOR, + TCA_U32_SEL, + TCA_U32_POLICE, + TCA_U32_ACT, + TCA_U32_INDEV, + TCA_U32_PCNT, + TCA_U32_MARK, + TCA_U32_FLAGS, + TCA_U32_PAD, + __TCA_U32_MAX +}; + +#define TCA_U32_MAX (__TCA_U32_MAX - 1) + +struct tc_u32_key { + __be32 mask; + __be32 val; + int off; + int offmask; +}; + +struct tc_u32_sel { + unsigned char flags; + unsigned char offshift; + unsigned char nkeys; + + __be16 offmask; + __u16 off; + short offoff; + + short hoff; + __be32 hmask; + struct tc_u32_key keys[0]; +}; + +struct tc_u32_mark { + __u32 val; + __u32 mask; + __u32 success; +}; + +struct tc_u32_pcnt { + __u64 rcnt; + __u64 rhit; + __u64 kcnts[0]; +}; + +/* Flags */ + +#define TC_U32_TERMINAL 1 +#define TC_U32_OFFSET 2 +#define TC_U32_VAROFFSET 4 +#define TC_U32_EAT 8 + +#define TC_U32_MAXDEPTH 8 + + +/* RSVP filter */ + +enum { + TCA_RSVP_UNSPEC, + TCA_RSVP_CLASSID, + TCA_RSVP_DST, + TCA_RSVP_SRC, + TCA_RSVP_PINFO, + TCA_RSVP_POLICE, + TCA_RSVP_ACT, + __TCA_RSVP_MAX +}; + +#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 ) + +struct tc_rsvp_gpi { + __u32 key; + __u32 mask; + int offset; +}; + +struct tc_rsvp_pinfo { + struct tc_rsvp_gpi dpi; + struct tc_rsvp_gpi spi; + __u8 protocol; + __u8 tunnelid; + __u8 tunnelhdr; + __u8 pad; +}; + +/* ROUTE filter */ + +enum { + TCA_ROUTE4_UNSPEC, + TCA_ROUTE4_CLASSID, + TCA_ROUTE4_TO, + TCA_ROUTE4_FROM, + TCA_ROUTE4_IIF, + TCA_ROUTE4_POLICE, + TCA_ROUTE4_ACT, + __TCA_ROUTE4_MAX +}; + +#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1) + + +/* FW filter */ + +enum { + TCA_FW_UNSPEC, + TCA_FW_CLASSID, + TCA_FW_POLICE, + TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ + TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ + TCA_FW_MASK, + __TCA_FW_MAX +}; + +#define TCA_FW_MAX (__TCA_FW_MAX - 1) + +/* TC index filter */ + +enum { + TCA_TCINDEX_UNSPEC, + TCA_TCINDEX_HASH, + TCA_TCINDEX_MASK, + TCA_TCINDEX_SHIFT, + TCA_TCINDEX_FALL_THROUGH, + TCA_TCINDEX_CLASSID, + TCA_TCINDEX_POLICE, + TCA_TCINDEX_ACT, + __TCA_TCINDEX_MAX +}; + +#define TCA_TCINDEX_MAX (__TCA_TCINDEX_MAX - 1) + +/* Flow filter */ + +enum { + FLOW_KEY_SRC, + FLOW_KEY_DST, + FLOW_KEY_PROTO, + FLOW_KEY_PROTO_SRC, + FLOW_KEY_PROTO_DST, + FLOW_KEY_IIF, + FLOW_KEY_PRIORITY, + FLOW_KEY_MARK, + FLOW_KEY_NFCT, + FLOW_KEY_NFCT_SRC, + FLOW_KEY_NFCT_DST, + FLOW_KEY_NFCT_PROTO_SRC, + FLOW_KEY_NFCT_PROTO_DST, + FLOW_KEY_RTCLASSID, + FLOW_KEY_SKUID, + FLOW_KEY_SKGID, + FLOW_KEY_VLAN_TAG, + FLOW_KEY_RXHASH, + __FLOW_KEY_MAX, +}; + +#define FLOW_KEY_MAX (__FLOW_KEY_MAX - 1) + +enum { + FLOW_MODE_MAP, + FLOW_MODE_HASH, +}; + +enum { + TCA_FLOW_UNSPEC, + TCA_FLOW_KEYS, + TCA_FLOW_MODE, + TCA_FLOW_BASECLASS, + TCA_FLOW_RSHIFT, + TCA_FLOW_ADDEND, + TCA_FLOW_MASK, + TCA_FLOW_XOR, + TCA_FLOW_DIVISOR, + TCA_FLOW_ACT, + TCA_FLOW_POLICE, + TCA_FLOW_EMATCHES, + TCA_FLOW_PERTURB, + __TCA_FLOW_MAX +}; + +#define TCA_FLOW_MAX (__TCA_FLOW_MAX - 1) + +/* Basic filter */ + +enum { + TCA_BASIC_UNSPEC, + TCA_BASIC_CLASSID, + TCA_BASIC_EMATCHES, + TCA_BASIC_ACT, + TCA_BASIC_POLICE, + __TCA_BASIC_MAX +}; + +#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + + +/* Cgroup classifier */ + +enum { + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + +/* BPF classifier */ + +#define TCA_BPF_FLAG_ACT_DIRECT (1 << 0) + +enum { + TCA_BPF_UNSPEC, + TCA_BPF_ACT, + TCA_BPF_POLICE, + TCA_BPF_CLASSID, + TCA_BPF_OPS_LEN, + TCA_BPF_OPS, + TCA_BPF_FD, + TCA_BPF_NAME, + TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, + TCA_BPF_TAG, + TCA_BPF_ID, + __TCA_BPF_MAX, +}; + +#define TCA_BPF_MAX (__TCA_BPF_MAX - 1) + +/* Flower classifier */ + +enum { + TCA_FLOWER_UNSPEC, + TCA_FLOWER_CLASSID, + TCA_FLOWER_INDEV, + TCA_FLOWER_ACT, + TCA_FLOWER_KEY_ETH_DST, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE, /* be16 */ + TCA_FLOWER_KEY_IP_PROTO, /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK, /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK, /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC, /* be16 */ + TCA_FLOWER_KEY_TCP_DST, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC, /* be16 */ + TCA_FLOWER_KEY_UDP_DST, /* be16 */ + + TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK, /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC, /* be16 */ + TCA_FLOWER_KEY_SCTP_DST, /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */ + + TCA_FLOWER_KEY_FLAGS, /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */ + + TCA_FLOWER_KEY_ARP_SIP, /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP, /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK, /* be32 */ + TCA_FLOWER_KEY_ARP_OP, /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK, /* u8 */ + TCA_FLOWER_KEY_ARP_SHA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA, /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK, /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL, /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS, /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC, /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL, /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS, /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK, /* be16 */ + + TCA_FLOWER_KEY_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID, /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS, + TCA_FLOWER_KEY_ENC_OPTS_MASK, + + TCA_FLOWER_IN_HW_COUNT, + + __TCA_FLOWER_MAX, +}; + +#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPTS_UNSPEC, + TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested + * TCA_FLOWER_KEY_ENC_OPT_GENEVE_ + * attributes + */ + __TCA_FLOWER_KEY_ENC_OPTS_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1) + +enum { + TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC, + TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */ + TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */ + + __TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX, +}; + +#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \ + (__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1) + +enum { + TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), + TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), +}; + +/* Match-all classifier */ + +enum { + TCA_MATCHALL_UNSPEC, + TCA_MATCHALL_CLASSID, + TCA_MATCHALL_ACT, + TCA_MATCHALL_FLAGS, + __TCA_MATCHALL_MAX, +}; + +#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1) + +/* Extended Matches */ + +struct tcf_ematch_tree_hdr { + __u16 nmatches; + __u16 progid; +}; + +enum { + TCA_EMATCH_TREE_UNSPEC, + TCA_EMATCH_TREE_HDR, + TCA_EMATCH_TREE_LIST, + __TCA_EMATCH_TREE_MAX +}; +#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1) + +struct tcf_ematch_hdr { + __u16 matchid; + __u16 kind; + __u16 flags; + __u16 pad; /* currently unused */ +}; + +/* 0 1 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 + * +-----------------------+-+-+---+ + * | Unused |S|I| R | + * +-----------------------+-+-+---+ + * + * R(2) ::= relation to next ematch + * where: 0 0 END (last ematch) + * 0 1 AND + * 1 0 OR + * 1 1 Unused (invalid) + * I(1) ::= invert result + * S(1) ::= simple payload + */ +#define TCF_EM_REL_END 0 +#define TCF_EM_REL_AND (1<<0) +#define TCF_EM_REL_OR (1<<1) +#define TCF_EM_INVERT (1<<2) +#define TCF_EM_SIMPLE (1<<3) + +#define TCF_EM_REL_MASK 3 +#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK) + +enum { + TCF_LAYER_LINK, + TCF_LAYER_NETWORK, + TCF_LAYER_TRANSPORT, + __TCF_LAYER_MAX +}; +#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1) + +/* Ematch type assignments + * 1..32767 Reserved for ematches inside kernel tree + * 32768..65535 Free to use, not reliable + */ +#define TCF_EM_CONTAINER 0 +#define TCF_EM_CMP 1 +#define TCF_EM_NBYTE 2 +#define TCF_EM_U32 3 +#define TCF_EM_META 4 +#define TCF_EM_TEXT 5 +#define TCF_EM_VLAN 6 +#define TCF_EM_CANID 7 +#define TCF_EM_IPSET 8 +#define TCF_EM_IPT 9 +#define TCF_EM_MAX 9 + +enum { + TCF_EM_PROG_TC +}; + +enum { + TCF_EM_OPND_EQ, + TCF_EM_OPND_GT, + TCF_EM_OPND_LT +}; + +#endif diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index c0d7ea0bf5b6..b17201edfa09 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -212,6 +212,7 @@ struct prctl_mm_map { #define PR_SET_SPECULATION_CTRL 53 /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h new file mode 100644 index 000000000000..6e89a5df49a4 --- /dev/null +++ b/tools/include/uapi/linux/tc_act/tc_bpf.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_BPF_H +#define __LINUX_TC_BPF_H + +#include <linux/pkt_cls.h> + +#define TCA_ACT_BPF 13 + +struct tc_act_bpf { + tc_gen; +}; + +enum { + TCA_ACT_BPF_UNSPEC, + TCA_ACT_BPF_TM, + TCA_ACT_BPF_PARMS, + TCA_ACT_BPF_OPS_LEN, + TCA_ACT_BPF_OPS, + TCA_ACT_BPF_FD, + TCA_ACT_BPF_NAME, + TCA_ACT_BPF_PAD, + TCA_ACT_BPF_TAG, + TCA_ACT_BPF_ID, + __TCA_ACT_BPF_MAX, +}; +#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1) + +#endif diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 7bc31c905018..197b40f5b5c6 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 425b480bda75..34d9c3619c96 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -66,7 +66,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -145,14 +145,26 @@ include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +VERSION_SCRIPT := libbpf.map + +GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') +VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_FILE) +CXX_TEST_TARGET = $(OUTPUT)test_libbpf + +ifeq ($(feature-cxx), 1) + CMD_TARGETS += $(CXX_TEST_TARGET) +endif + TARGETS = $(CMD_TARGETS) all: fixdep all_cmd -all_cmd: $(CMD_TARGETS) +all_cmd: $(CMD_TARGETS) check $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ @@ -170,11 +182,27 @@ $(BPF_IN): force elfdep bpfdep $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared $^ -o $@ + $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ + $^ -o $@ $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CXX) $^ -lelf -o $@ + +check: check_abi + +check_abi: $(OUTPUT)libbpf.so + @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ + echo "Warning: Num of global symbols in $(BPF_IN)" \ + "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ + "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ + "Please make sure all LIBBPF_API symbols are" \ + "versioned in $(VERSION_SCRIPT)." >&2; \ + exit 1; \ + fi + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -201,8 +229,8 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ - $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst new file mode 100644 index 000000000000..056f38310722 --- /dev/null +++ b/tools/lib/bpf/README.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 03f9bcc4ef50..3caaa3428774 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -173,9 +173,35 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, -1); } +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { + void *finfo = NULL, *linfo = NULL; union bpf_attr attr; __u32 name_len; int fd; @@ -196,19 +222,72 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_level = 0; attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + if (fd >= 0) return fd; + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; + + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; + + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; + } + + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + + if (fd >= 0) + goto done; + } + + if (!log_buf || !log_buf_sz) + goto done; + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +done: + free(finfo); + free(linfo); + return fd; } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -231,9 +310,9 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; @@ -247,7 +326,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } @@ -415,6 +494,29 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, return ret; } +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + bzero(&attr, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 26a51538213c..8f09de482839 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -27,6 +27,10 @@ #include <stdbool.h> #include <stddef.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -74,6 +78,13 @@ struct bpf_load_program_attr { const char *license; __u32 kern_version; __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; }; /* Flags to direct loading requirements */ @@ -90,7 +101,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); @@ -110,6 +121,25 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); @@ -128,4 +158,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c new file mode 100644 index 000000000000..6978314ea7f6 --- /dev/null +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <string.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/bpf.h> +#include "libbpf.h" + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + + nr_linfo = info->nr_line_info; + + if (!nr_linfo) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, + nr_linfo * prog_linfo->rec_size); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->nr_jited_line_info != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + prog_linfo->raw_jited_linfo = malloc(nr_linfo * + prog_linfo->jited_rec_size); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, + nr_linfo * prog_linfo->jited_rec_size); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 449591aa9900..d682d3b8f7b9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -37,6 +37,48 @@ struct btf { int fd; }; +struct btf_ext_info { + /* + * info points to a deep copy of the individual info section + * (e.g. func_info and line_info) from the .BTF.ext. + * It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + +struct btf_ext { + struct btf_ext_info func_info; + struct btf_ext_info line_info; +}; + +struct btf_ext_info_sec { + __u32 sec_name_off; + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_off; + __u32 type_id; +}; + +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -165,6 +207,10 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) case BTF_KIND_ENUM: next_type += vlen * sizeof(struct btf_enum); break; + case BTF_KIND_FUNC_PROTO: + next_type += vlen * sizeof(struct btf_param); + break; + case BTF_KIND_FUNC: case BTF_KIND_TYPEDEF: case BTF_KIND_PTR: case BTF_KIND_FWD: @@ -393,3 +439,350 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) else return NULL; } + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + bzero(ptr, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + +struct btf_ext_sec_copy_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_copy_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + struct btf_ext_sec_copy_param *ext_sec, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + const void *info; + + /* data and data_size do not include btf_ext_header from now on */ + data = data + hdr->hdr_len; + data_size -= hdr->hdr_len; + + if (ext_sec->off & 0x03) { + elog(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); + return -EINVAL; + } + + if (data_size < ext_sec->off || + ext_sec->len > data_size - ext_sec->off) { + elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); + return -EINVAL; + } + + info = data + ext_sec->off; + info_left = ext_sec->len; + + /* At least a record size */ + if (info_left < sizeof(__u32)) { + elog(".BTF.ext %s record size not found\n", ext_sec->desc); + return -EINVAL; + } + + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; + if (record_size < ext_sec->min_rec_size || + record_size & 0x03) { + elog("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); + return -EINVAL; + } + + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); + + /* If no records, return failure now so .BTF.ext won't be used. */ + if (!info_left) { + elog("%s section in .BTF.ext has no records", ext_sec->desc); + return -EINVAL; + } + + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { + elog("%s section header is not found in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + num_records = sinfo->num_info; + if (num_records == 0) { + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (info_left < total_record_size) { + elog("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + info_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = malloc(ext_info->len); + if (!ext_info->info) + return -ENOMEM; + memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + + return 0; +} + +static int btf_ext_copy_func_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->func_info_off, + .len = hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_copy_line_info(struct btf_ext *btf_ext, + __u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + struct btf_ext_sec_copy_param param = { + .off = hdr->line_info_off, + .len = hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, + btf_print_fn_t err_log) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + + if (data_size < offsetof(struct btf_ext_header, func_info_off) || + data_size < hdr->hdr_len) { + elog("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + elog("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + elog("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + if (data_size == hdr->hdr_len) { + elog("BTF.ext has no data\n"); + return -EINVAL; + } + + return 0; +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + + free(btf_ext->func_info.info); + free(btf_ext->line_info.info); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +{ + struct btf_ext *btf_ext; + int err; + + err = btf_ext_parse_hdr(data, size, err_log); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + err = btf_ext_copy_func_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + return btf_ext; +} + +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) +{ + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; + while (remain_len > 0) { + records_len = sinfo->num_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_len, sinfo->data, records_len); + /* adjust insn_off only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + + insns_cnt; + } + *info = data; + *cnt += sinfo->num_info; + return 0; + } + + return -ENOENT; +} + +int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b77e7080f7e7..b0610dcdae6b 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -6,15 +6,55 @@ #include <linux/types.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif #define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" struct btf; +struct btf_ext; struct btf_type; +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; +}; + typedef int (*btf_print_fn_t)(const char *, ...) __attribute__((format(printf, 1, 2))); @@ -28,5 +68,23 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +void btf_ext__free(struct btf_ext *btf_ext); +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *func_info_len); +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d6e62e90e8d4..169e347c76f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdlib.h> #include <stdio.h> #include <stdarg.h> @@ -24,6 +26,7 @@ #include <linux/kernel.h> #include <linux/bpf.h> #include <linux/btf.h> +#include <linux/filter.h> #include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> @@ -114,6 +117,11 @@ void libbpf_set_print(libbpf_print_fn_t warn, # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +struct bpf_capabilities { + /* v4.14: kernel support for program & map names. */ + __u32 name:1; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -124,6 +132,10 @@ struct bpf_program { char *name; int prog_ifindex; char *section_name; + /* section_name with / replaced by _; makes recursive pinning + * in bpf_object__pin_programs easier + */ + char *pin_name; struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; @@ -152,6 +164,16 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + int btf_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_cnt; + + struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; }; struct bpf_map { @@ -159,6 +181,7 @@ struct bpf_map { char *name; size_t offset; int map_ifindex; + int inner_map_fd; struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -208,10 +231,13 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; void *priv; bpf_object_clear_priv_t clear_priv; + struct bpf_capabilities caps; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -237,6 +263,10 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); + + zclose(prog->btf_fd); + zfree(&prog->func_info); + zfree(&prog->line_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -253,6 +283,7 @@ static void bpf_program__exit(struct bpf_program *prog) bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->section_name); + zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -261,6 +292,17 @@ static void bpf_program__exit(struct bpf_program *prog) prog->idx = -1; } +static char *__bpf_program__pin_name(struct bpf_program *prog) +{ + char *name, *p; + + name = p = strdup(prog->section_name); + while ((p = strchr(p, '/'))) + *p = '_'; + + return name; +} + static int bpf_program__init(void *data, size_t size, char *section_name, int idx, struct bpf_program *prog) @@ -279,6 +321,13 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, goto errout; } + prog->pin_name = __bpf_program__pin_name(prog); + if (!prog->pin_name) { + pr_warning("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + prog->insns = malloc(size); if (!prog->insns) { pr_warning("failed to alloc insns for prog under section %s\n", @@ -291,7 +340,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->idx = idx; prog->instances.fds = NULL; prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_KPROBE; + prog->type = BPF_PROG_TYPE_UNSPEC; + prog->btf_fd = -1; return 0; errout: @@ -310,6 +360,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, if (err) return err; + prog.caps = &obj->caps; progs = obj->programs; nr_progs = obj->nr_programs; @@ -562,6 +613,14 @@ static int compare_bpf_map(const void *_a, const void *_b) return a->offset - b->offset; } +static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) +{ + if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS) + return true; + return false; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { @@ -625,13 +684,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) } obj->nr_maps = nr_maps; - /* - * fill all fd with -1 so won't close incorrect - * fd (fd=0 is stdin) when failure (zclose won't close - * negative fd)). - */ - for (i = 0; i < nr_maps; i++) + for (i = 0; i < nr_maps; i++) { + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ obj->maps[i].fd = -1; + obj->maps[i].inner_map_fd = -1; + } /* * Fill obj->maps using data in "maps" section. @@ -723,6 +784,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -784,6 +846,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; } + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -845,6 +909,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + } else { + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size, + __pr_debug); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } + } + } if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj, flags); if (err) @@ -1095,6 +1175,52 @@ err_free_new_name: } static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure basic loading works */ + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); + return -errno; + } + close(ret); + + /* now try the same program, but with the name */ + + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret >= 0) { + obj->caps.name = 1; + close(ret); + } + + return 0; +} + +static int +bpf_object__probe_caps(struct bpf_object *obj) +{ + return bpf_object__probe_name(obj); +} + +static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_create_map_attr create_attr = {}; @@ -1113,7 +1239,8 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } - create_attr.name = map->name; + if (obj->caps.name) + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; @@ -1123,6 +1250,9 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; + if (bpf_map_type__is_map_in_map(def->type) && + map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -1161,12 +1291,89 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warning("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error + * out. + */ + pr_warning("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* + * Have problem loading the very first info. Ignore + * the rest. + */ + pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + + if (!insn_offset) + prog->btf_fd = btf__fd(obj->btf); + + return 0; +} + +static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) { struct bpf_insn *insn, *new_insn; struct bpf_program *text; size_t new_cnt; + int err; if (relo->type != RELO_CALL) return -LIBBPF_ERRNO__RELOC; @@ -1189,6 +1396,15 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, pr_warning("oom in prog realloc\n"); return -ENOMEM; } + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) + return err; + } + memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); prog->insns = new_insn; @@ -1208,7 +1424,17 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { int i, err; - if (!prog || !prog->reloc_desc) + if (!prog) + return 0; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) + return err; + } + + if (!prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { @@ -1296,9 +1522,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, - const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, int prog_ifindex) +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1306,15 +1531,22 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = expected_attach_type; - load_attr.name = name; + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + if (prog->caps->name) + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog_ifindex; - + load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1394,10 +1626,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, prog->insns, prog->insns_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + err = load_program(prog, prog->insns, prog->insns_cnt, + license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1425,11 +1655,9 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, result.new_insn_ptr, + err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + license, kern_version, &fd); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -1495,12 +1723,12 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: - return false; case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + return false; + case BPF_PROG_TYPE_KPROBE: default: return true; } @@ -1627,6 +1855,7 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); @@ -1699,6 +1928,34 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warning("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned program '%s'\n", path); + + return 0; +} + static int make_dir(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -1733,6 +1990,11 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return -EINVAL; } + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__pin_instance(prog, path, 0); + } + err = make_dir(path); if (err) return err; @@ -1742,16 +2004,83 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) int len; len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) { + err = -EINVAL; + goto err_unpin; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin; + } + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + goto err_unpin; + } + + return 0; + +err_unpin: + for (i = i - 1; i >= 0; i--) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin_instance(prog, buf, i); + } + + rmdir(path); + + return err; +} + +int bpf_program__unpin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warning("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__unpin_instance(prog, path, 0); + } + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program__unpin_instance(prog, buf, i); if (err) return err; } + err = rmdir(path); + if (err) + return -errno; + return 0; } @@ -1776,12 +2105,33 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } pr_debug("pinned map '%s'\n", path); + return 0; } -int bpf_object__pin(struct bpf_object *obj, const char *path) +int bpf_map__unpin(struct bpf_map *map, const char *path) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (map == NULL) { + pr_warning("invalid map pointer\n"); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned map '%s'\n", path); + + return 0; +} + +int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { - struct bpf_program *prog; struct bpf_map *map; int err; @@ -1803,28 +2153,142 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + + err = bpf_map__pin(map, buf); + if (err) + goto err_unpin_maps; + } + + return 0; + +err_unpin_maps: + while ((map = bpf_map__prev(map, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_map__unpin(map, buf); + } + + return err; +} + +int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + bpf_map__for_each(map, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_map__pin(map, buf); + err = bpf_map__unpin(map, buf); if (err) return err; } + return 0; +} + +int bpf_object__pin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warning("object not yet loaded; load it first\n"); + return -ENOENT; + } + + err = make_dir(path); + if (err) + return err; + bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); + if (len < 0) { + err = -EINVAL; + goto err_unpin_programs; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_programs; + } + + err = bpf_program__pin(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_program__prev(prog, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + +int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin(prog, buf); + err = bpf_program__unpin(prog, buf); if (err) return err; } @@ -1832,6 +2296,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__pin_maps(obj, path); + if (err) + return err; + + err = bpf_object__pin_programs(obj, path); + if (err) { + bpf_object__unpin_maps(obj, path); + return err; + } + + return 0; +} + void bpf_object__close(struct bpf_object *obj) { size_t i; @@ -1845,6 +2326,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); + btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); @@ -1918,23 +2400,26 @@ void *bpf_object__priv(struct bpf_object *obj) } static struct bpf_program * -__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) { - size_t idx; + size_t nr_programs = obj->nr_programs; + ssize_t idx; - if (!obj->programs) + if (!nr_programs) return NULL; - /* First handler */ - if (prev == NULL) - return &obj->programs[0]; - if (prev->obj != obj) { + if (!p) + /* Iter from the beginning */ + return forward ? &obj->programs[0] : + &obj->programs[nr_programs - 1]; + + if (p->obj != obj) { pr_warning("error: program handler doesn't match object\n"); return NULL; } - idx = (prev - obj->programs) + 1; - if (idx >= obj->nr_programs) + idx = (p - obj->programs) + (forward ? 1 : -1); + if (idx >= obj->nr_programs || idx < 0) return NULL; return &obj->programs[idx]; } @@ -1945,7 +2430,19 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) struct bpf_program *prog = prev; do { - prog = __bpf_program__next(prog, obj); + prog = __bpf_program__iter(prog, obj, true); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +struct bpf_program * +bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) +{ + struct bpf_program *prog = next; + + do { + prog = __bpf_program__iter(prog, obj, false); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; @@ -2272,10 +2769,24 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) map->map_ifindex = ifindex; } -struct bpf_map * -bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { - size_t idx; + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warning("error: unsupported map type\n"); + return -EINVAL; + } + if (map->inner_map_fd != -1) { + pr_warning("error: inner_map_fd already specified\n"); + return -EINVAL; + } + map->inner_map_fd = fd; + return 0; +} + +static struct bpf_map * +__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) +{ + ssize_t idx; struct bpf_map *s, *e; if (!obj || !obj->maps) @@ -2284,22 +2795,40 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) s = obj->maps; e = obj->maps + obj->nr_maps; - if (prev == NULL) - return s; - - if ((prev < s) || (prev >= e)) { + if ((m < s) || (m >= e)) { pr_warning("error in %s: map handler doesn't belong to object\n", __func__); return NULL; } - idx = (prev - obj->maps) + 1; - if (idx >= obj->nr_maps) + idx = (m - obj->maps) + i; + if (idx >= obj->nr_maps || idx < 0) return NULL; return &obj->maps[idx]; } struct bpf_map * +bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +{ + if (prev == NULL) + return obj->maps; + + return __bpf_map__iter(prev, obj, 1); +} + +struct bpf_map * +bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) +{ + if (next == NULL) { + if (!obj->nr_maps) + return NULL; + return obj->maps + obj->nr_maps - 1; + } + + return __bpf_map__iter(next, obj, -1); +} + +struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { struct bpf_map *pos; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1f3468dad8b2..5f68d7b75215 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,10 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -71,6 +75,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); +LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, + const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); LIBBPF_API void bpf_object__close(struct bpf_object *object); @@ -112,6 +123,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, (pos) != NULL; \ (pos) = bpf_program__next((pos), (obj))) +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + struct bpf_object *obj); + typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -131,7 +145,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog); LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -260,6 +278,9 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) +LIBBPF_API struct bpf_map * +bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); + LIBBPF_API int bpf_map__fd(struct bpf_map *map); LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); LIBBPF_API const char *bpf_map__name(struct bpf_map *map); @@ -274,6 +295,9 @@ LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); + +LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API long libbpf_get_error(const void *ptr); @@ -317,4 +341,22 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_LIBBPF_H */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map new file mode 100644 index 000000000000..cd02cd4e2cc3 --- /dev/null +++ b/tools/lib/bpf/libbpf.map @@ -0,0 +1,126 @@ +LIBBPF_0.0.1 { + global: + bpf_btf_get_fd_by_id; + bpf_create_map; + bpf_create_map_in_map; + bpf_create_map_in_map_node; + bpf_create_map_name; + bpf_create_map_node; + bpf_create_map_xattr; + bpf_load_btf; + bpf_load_program; + bpf_load_program_xattr; + bpf_map__btf_key_type_id; + bpf_map__btf_value_type_id; + bpf_map__def; + bpf_map__fd; + bpf_map__is_offload_neutral; + bpf_map__name; + bpf_map__next; + bpf_map__pin; + bpf_map__prev; + bpf_map__priv; + bpf_map__reuse_fd; + bpf_map__set_ifindex; + bpf_map__set_inner_map_fd; + bpf_map__set_priv; + bpf_map__unpin; + bpf_map_delete_elem; + bpf_map_get_fd_by_id; + bpf_map_get_next_id; + bpf_map_get_next_key; + bpf_map_lookup_and_delete_elem; + bpf_map_lookup_elem; + bpf_map_update_elem; + bpf_obj_get; + bpf_obj_get_info_by_fd; + bpf_obj_pin; + bpf_object__btf_fd; + bpf_object__close; + bpf_object__find_map_by_name; + bpf_object__find_map_by_offset; + bpf_object__find_program_by_title; + bpf_object__kversion; + bpf_object__load; + bpf_object__name; + bpf_object__next; + bpf_object__open; + bpf_object__open_buffer; + bpf_object__open_xattr; + bpf_object__pin; + bpf_object__pin_maps; + bpf_object__pin_programs; + bpf_object__priv; + bpf_object__set_priv; + bpf_object__unload; + bpf_object__unpin_maps; + bpf_object__unpin_programs; + bpf_perf_event_read_simple; + bpf_prog_attach; + bpf_prog_detach; + bpf_prog_detach2; + bpf_prog_get_fd_by_id; + bpf_prog_get_next_id; + bpf_prog_load; + bpf_prog_load_xattr; + bpf_prog_query; + bpf_prog_test_run; + bpf_prog_test_run_xattr; + bpf_program__fd; + bpf_program__is_kprobe; + bpf_program__is_perf_event; + bpf_program__is_raw_tracepoint; + bpf_program__is_sched_act; + bpf_program__is_sched_cls; + bpf_program__is_socket_filter; + bpf_program__is_tracepoint; + bpf_program__is_xdp; + bpf_program__load; + bpf_program__next; + bpf_program__nth_fd; + bpf_program__pin; + bpf_program__pin_instance; + bpf_program__prev; + bpf_program__priv; + bpf_program__set_expected_attach_type; + bpf_program__set_ifindex; + bpf_program__set_kprobe; + bpf_program__set_perf_event; + bpf_program__set_prep; + bpf_program__set_priv; + bpf_program__set_raw_tracepoint; + bpf_program__set_sched_act; + bpf_program__set_sched_cls; + bpf_program__set_socket_filter; + bpf_program__set_tracepoint; + bpf_program__set_type; + bpf_program__set_xdp; + bpf_program__title; + bpf_program__unload; + bpf_program__unpin; + bpf_program__unpin_instance; + bpf_prog_linfo__free; + bpf_prog_linfo__new; + bpf_prog_linfo__lfind_addr_func; + bpf_prog_linfo__lfind; + bpf_raw_tracepoint_open; + bpf_set_link_xdp_fd; + bpf_task_fd_query; + bpf_verify_program; + btf__fd; + btf__find_by_name; + btf__free; + btf__get_from_id; + btf__name_by_offset; + btf__new; + btf__resolve_size; + btf__resolve_type; + btf__type_by_id; + libbpf_attach_type_by_name; + libbpf_get_error; + libbpf_prog_type_by_name; + libbpf_set_print; + libbpf_strerror; + local: + *; +}; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d83b17f8435c..4343e40588c6 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include <stdio.h> #include <string.h> diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp new file mode 100644 index 000000000000..abf3fc25c9fa --- /dev/null +++ b/tools/lib/bpf/test_libbpf.cpp @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" + +/* do nothing, just make sure we can link successfully */ + +int main(int argc, char *argv[]) +{ + /* libbpf.h */ + libbpf_set_print(NULL, NULL, NULL); + + /* bpf.h */ + bpf_prog_get_fd_by_id(0); + + /* btf.h */ + btf__new(NULL, 0, NULL); +} diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 6dbb9fae0f9d..b8f3cca8e58b 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -31,6 +31,8 @@ #include "elf.h" #include "warn.h" +#define MAX_NAME_LEN 128 + struct section *find_section_by_name(struct elf *elf, const char *name) { struct section *sec; @@ -298,6 +300,8 @@ static int read_symbols(struct elf *elf) /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { list_for_each_entry(sym, &sec->symbol_list, list) { + char pname[MAX_NAME_LEN + 1]; + size_t pnamelen; if (sym->type != STT_FUNC) continue; sym->pfunc = sym->cfunc = sym; @@ -305,14 +309,21 @@ static int read_symbols(struct elf *elf) if (!coldstr) continue; - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; + pnamelen = coldstr - sym->name; + if (pnamelen > MAX_NAME_LEN) { + WARN("%s(): parent function name exceeds maximum length of %d characters", + sym->name, MAX_NAME_LEN); + return -1; + } + + strncpy(pname, sym->name, pnamelen); + pname[pnamelen] = '\0'; + pfunc = find_symbol_by_name(elf, pname); if (!pfunc) { WARN("%s(): can't find parent function", sym->name); - goto err; + return -1; } sym->pfunc = pfunc; diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e30d20fb482d..a0e8c23f9125 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -299,6 +299,11 @@ ifndef NO_BIONIC endif endif +ifeq ($(feature-get_current_dir_name), 1) + CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME +endif + + ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 37940665f736..efd0157b9d22 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -9,7 +9,7 @@ size=112 config=0 sample_period=* sample_type=263 -read_format=0 +read_format=0|4 disabled=1 inherit=1 pinned=0 diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 5d2a7fd8d407..eae59ad15ce3 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -31,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size) "TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", "TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER", + "TIOCGISO7816", "TIOCSISO7816", [_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index ecd9f9ceda77..b7bf201fe8a8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ libperf-y += evlist.o libperf-y += evsel.o libperf-y += evsel_fprintf.o libperf-y += find_bit.o +libperf-y += get_current_dir_name.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d37bb1566cd9..dbc0466db368 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1092,7 +1092,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->own_cpus) + if (evsel->own_cpus || evsel->unit) evsel->attr.read_format |= PERF_FORMAT_ID; /* diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c new file mode 100644 index 000000000000..267aa609a582 --- /dev/null +++ b/tools/perf/util/get_current_dir_name.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// +#ifndef HAVE_GET_CURRENT_DIR_NAME +#include "util.h" +#include <unistd.h> +#include <stdlib.h> +#include <stdlib.h> + +/* Android's 'bionic' library, for one, doesn't have this */ + +char *get_current_dir_name(void) +{ + char pwd[PATH_MAX]; + + return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); +} +#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cf8bd123cf73..aed170bd4384 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <asm/bug.h> struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -223,11 +231,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index cae1a9a39722..d5f46c09ea31 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -38,6 +38,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 14508ee7707a..ece040b799f6 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -59,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); +#ifndef HAVE_GET_CURRENT_DIR_NAME +char *get_current_dir_name(void); +#endif + #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 1dd5f4fcffd5..db66a952c173 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -129,7 +129,7 @@ WARNINGS += $(call cc-supports,-Wno-pointer-sign) WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) WARNINGS += -Wshadow -CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \ +override CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \ -DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \ @@ -156,12 +156,12 @@ LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS)) -CFLAGS += -pipe +override CFLAGS += -pipe ifeq ($(strip $(NLS)),true) INSTALL_NLS += install-gmo COMPILE_NLS += create-gmo - CFLAGS += -DNLS + override CFLAGS += -DNLS endif ifeq ($(strip $(CPUFREQ_BENCH)),true) @@ -175,7 +175,7 @@ ifeq ($(strip $(STATIC)),true) UTIL_SRC += $(LIB_SRC) endif -CFLAGS += $(WARNINGS) +override CFLAGS += $(WARNINGS) ifeq ($(strip $(V)),false) QUIET=@ @@ -188,10 +188,10 @@ export QUIET ECHO # if DEBUG is enabled, then we do not strip or optimize ifeq ($(strip $(DEBUG)),true) - CFLAGS += -O1 -g -DDEBUG + override CFLAGS += -O1 -g -DDEBUG STRIPCMD = /bin/true -Since_we_are_debugging else - CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer + override CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment endif diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index d79ab161cc75..f68b4bc55273 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -9,7 +9,7 @@ endif ifeq ($(strip $(STATIC)),true) LIBS = -L../ -L$(OUTPUT) -lm OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \ - $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o + $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o else LIBS = -L../ -L$(OUTPUT) -lm -lcpupower OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index 59af84b8ef45..b1b6c43644e7 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -13,10 +13,10 @@ INSTALL = /usr/bin/install default: all $(OUTPUT)centrino-decode: ../i386/centrino-decode.c - $(CC) $(CFLAGS) -o $@ $< + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $< $(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c - $(CC) $(CFLAGS) -o $@ $< + $(CC) $(CFLAGS) -o $@ $(LDFLAGS) $< all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 1b993fe1ce23..0c0f3e3f0d80 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname, snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s", cpu, fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } /* helper function to write a new value to a /sys file */ diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 9bd4c7655fdb..852d25462388 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf, snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname); - return sysfs_read_file(path, buf, buflen); + return cpupower_read_sysfs(path, buf, buflen); } diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c index 9c395ec924de..9711d628b0f4 100644 --- a/tools/power/cpupower/lib/cpupower.c +++ b/tools/power/cpupower/lib/cpupower.c @@ -15,7 +15,7 @@ #include "cpupower.h" #include "cpupower_intern.h" -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen) { int fd; ssize_t numread; @@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s", cpu, fname); - if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) + if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0) return -1; *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h index 92affdfbe417..4887c76d23f8 100644 --- a/tools/power/cpupower/lib/cpupower_intern.h +++ b/tools/power/cpupower/lib/cpupower_intern.h @@ -3,4 +3,4 @@ #define MAX_LINE_LEN 4096 #define SYSFS_PATH_MAX 255 -unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9527d47a1070..6c16ac36d482 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -15,6 +15,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/libnvdimm.h> +#include <linux/genalloc.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/module.h> @@ -140,8 +141,8 @@ static u32 handle[] = { [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; -static unsigned long dimm_fail_cmd_flags[NUM_DCR]; -static int dimm_fail_cmd_code[NUM_DCR]; +static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)]; +static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; static const struct nd_intel_smart smart_def = { .flags = ND_INTEL_SMART_HEALTH_VALID @@ -205,7 +206,7 @@ struct nfit_test { unsigned long deadline; spinlock_t lock; } ars_state; - struct device *dimm_dev[NUM_DCR]; + struct device *dimm_dev[ARRAY_SIZE(handle)]; struct nd_intel_smart *smart; struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; @@ -215,6 +216,8 @@ struct nfit_test { static struct workqueue_struct *nfit_wq; +static struct gen_pool *nfit_pool; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1132,6 +1135,9 @@ static void release_nfit_res(void *data) list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); + if (resource_size(&nfit_res->res) >= DIMM_SIZE) + gen_pool_free(nfit_pool, nfit_res->res.start, + resource_size(&nfit_res->res)); vfree(nfit_res->buf); kfree(nfit_res); } @@ -1144,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, GFP_KERNEL); int rc; - if (!buf || !nfit_res) + if (!buf || !nfit_res || !*dma) goto err; rc = devm_add_action(dev, release_nfit_res, nfit_res); if (rc) @@ -1164,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, return nfit_res->buf; err: + if (*dma && size >= DIMM_SIZE) + gen_pool_free(nfit_pool, *dma, size); if (buf) vfree(buf); kfree(nfit_res); @@ -1172,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) { + struct genpool_data_align data = { + .align = SZ_128M, + }; void *buf = vmalloc(size); - *dma = (unsigned long) buf; + if (size >= DIMM_SIZE) + *dma = gen_pool_alloc_algo(nfit_pool, size, + gen_pool_first_fit_align, &data); + else + *dma = (unsigned long) buf; return __test_alloc(t, size, dma, buf); } @@ -2680,7 +2695,7 @@ static int nfit_test_probe(struct platform_device *pdev) u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; int i; - for (i = 0; i < NUM_DCR; i++) + for (i = 0; i < ARRAY_SIZE(handle); i++) if (nfit_handle == handle[i]) dev_set_drvdata(nfit_test->dimm_dev[i], nfit_mem); @@ -2839,6 +2854,17 @@ static __init int nfit_test_init(void) goto err_register; } + nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!nfit_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + for (i = 0; i < NUM_NFITS; i++) { struct nfit_test *nfit_test; struct platform_device *pdev; @@ -2894,6 +2920,9 @@ static __init int nfit_test_init(void) return 0; err_register: + if (nfit_pool) + gen_pool_destroy(nfit_pool); + destroy_workqueue(nfit_wq); for (i = 0; i < NUM_NFITS; i++) if (instances[i]) @@ -2917,6 +2946,8 @@ static __exit void nfit_test_exit(void) platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + gen_pool_destroy(nfit_pool); + for (i = 0; i < NUM_NFITS; i++) put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm); diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index acf1afa01c5b..397d6b612502 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -7,6 +7,7 @@ LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ + regression4.o \ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o ifndef SHIFT diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 77a44c54998f..7a22d6e3732e 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -308,6 +308,7 @@ int main(int argc, char **argv) regression1_test(); regression2_test(); regression3_test(); + regression4_test(); iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h index 3c8a1584e9ee..135145af18b7 100644 --- a/tools/testing/radix-tree/regression.h +++ b/tools/testing/radix-tree/regression.h @@ -5,5 +5,6 @@ void regression1_test(void); void regression2_test(void); void regression3_test(void); +void regression4_test(void); #endif diff --git a/tools/testing/radix-tree/regression4.c b/tools/testing/radix-tree/regression4.c new file mode 100644 index 000000000000..cf4e5aba6b08 --- /dev/null +++ b/tools/testing/radix-tree/regression4.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> +#include <stdlib.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +#include "regression.h" + +static pthread_barrier_t worker_barrier; +static int obj0, obj1; +static RADIX_TREE(mt_tree, GFP_KERNEL); + +static void *reader_fn(void *arg) +{ + int i; + void *entry; + + rcu_register_thread(); + pthread_barrier_wait(&worker_barrier); + + for (i = 0; i < 1000000; i++) { + rcu_read_lock(); + entry = radix_tree_lookup(&mt_tree, 0); + rcu_read_unlock(); + if (entry != &obj0) { + printf("iteration %d bad entry = %p\n", i, entry); + abort(); + } + } + + rcu_unregister_thread(); + + return NULL; +} + +static void *writer_fn(void *arg) +{ + int i; + + rcu_register_thread(); + pthread_barrier_wait(&worker_barrier); + + for (i = 0; i < 1000000; i++) { + radix_tree_insert(&mt_tree, 1, &obj1); + radix_tree_delete(&mt_tree, 1); + } + + rcu_unregister_thread(); + + return NULL; +} + +void regression4_test(void) +{ + pthread_t reader, writer; + + printv(1, "regression test 4 starting\n"); + + radix_tree_insert(&mt_tree, 0, &obj0); + pthread_barrier_init(&worker_barrier, NULL, 2); + + if (pthread_create(&reader, NULL, reader_fn, NULL) || + pthread_create(&writer, NULL, writer_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + + if (pthread_join(reader, NULL) || pthread_join(writer, NULL)) { + perror("pthread_join"); + exit(1); + } + + printv(1, "regression test 4 passed\n"); +} diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f1fe492c8e17..24b9934fb269 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -24,6 +24,8 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += netfilter +TARGETS += networking/timestamping TARGETS += nsfs TARGETS += powerpc TARGETS += proc diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1b799e30c06d..4a9785043a39 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -27,3 +27,4 @@ test_flow_dissector flow_dissector_load test_netcnt test_section_names +test_tcpnotify_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4a54236475ae..73aa6d8f4a2f 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -24,12 +24,13 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt + test_netcnt test_tcpnotify_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + test_tcpnotify_kern.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ @@ -38,7 +39,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ - xdp_dummy.o + xdp_dummy.o test_map_in_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -75,6 +76,7 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c +$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c @@ -125,7 +127,14 @@ $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) +ifneq ($(BTF_LLVM_PROBE),) + CLANG_FLAGS += -g +else ifneq ($(BTF_LLC_PROBE),) ifneq ($(BTF_PAHOLE_PROBE),) ifneq ($(BTF_OBJCOPY_PROBE),) @@ -135,6 +144,17 @@ ifneq ($(BTF_OBJCOPY_PROBE),) endif endif endif +endif + +# Have one program compiled without "-target bpf" to test whether libbpf loads +# it successfully +$(OUTPUT)/test_xdp.o: test_xdp.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c index 107350a7821d..284660f5aa95 100644 --- a/tools/testing/selftests/bpf/bpf_flow.c +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -70,18 +70,18 @@ static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; - __u16 nhoff = skb->flow_keys->nhoff; + __u16 thoff = skb->flow_keys->thoff; __u8 *hdr; /* Verifies this variable offset does not overflow */ - if (nhoff > (USHRT_MAX - hdr_size)) + if (thoff > (USHRT_MAX - hdr_size)) return NULL; - hdr = data + nhoff; + hdr = data + thoff; if (hdr + hdr_size <= data_end) return hdr; - if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) return NULL; return buffer; @@ -116,7 +116,7 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) return BPF_DROP; } -SEC("dissect") +SEC("flow_dissector") int _dissect(struct __sk_buff *skb) { if (!skb->vlan_present) @@ -158,13 +158,13 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) /* Only inspect standard GRE packets with version 0 */ return BPF_OK; - keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) - keys->nhoff += 4; /* Step over chksum and Padding */ + keys->thoff += 4; /* Step over chksum and Padding */ if (GRE_IS_KEY(gre->flags)) - keys->nhoff += 4; /* Step over key */ + keys->thoff += 4; /* Step over key */ if (GRE_IS_SEQ(gre->flags)) - keys->nhoff += 4; /* Step over sequence number */ + keys->thoff += 4; /* Step over sequence number */ keys->is_encap = true; @@ -174,7 +174,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!eth) return BPF_DROP; - keys->nhoff += sizeof(*eth); + keys->thoff += sizeof(*eth); return parse_eth_proto(skb, eth->h_proto); } else { @@ -191,7 +191,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if ((__u8 *)tcp + (tcp->doff << 2) > data_end) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = tcp->source; keys->dport = tcp->dest; return BPF_OK; @@ -201,7 +200,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) if (!udp) return BPF_DROP; - keys->thoff = keys->nhoff; keys->sport = udp->source; keys->dport = udp->dest; return BPF_OK; @@ -252,8 +250,8 @@ PROG(IP)(struct __sk_buff *skb) keys->ipv4_src = iph->saddr; keys->ipv4_dst = iph->daddr; - keys->nhoff += iph->ihl << 2; - if (data + keys->nhoff > data_end) + keys->thoff += iph->ihl << 2; + if (data + keys->thoff > data_end) return BPF_DROP; if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { @@ -285,7 +283,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); - keys->nhoff += sizeof(struct ipv6hdr); + keys->thoff += sizeof(struct ipv6hdr); return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -301,7 +299,7 @@ PROG(IPV6OP)(struct __sk_buff *skb) /* hlen is in 8-octets and does not include the first 8 bytes * of the header */ - skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -315,7 +313,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) if (!fragh) return BPF_DROP; - keys->nhoff += sizeof(*fragh); + keys->thoff += sizeof(*fragh); keys->is_frag = true; if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) keys->is_first_frag = true; @@ -341,7 +339,7 @@ PROG(VLAN)(struct __sk_buff *skb) __be16 proto; /* Peek back to see if single or double-tagging */ - if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, sizeof(proto))) return BPF_DROP; @@ -354,14 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb) if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); } vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) return BPF_DROP; - keys->nhoff += sizeof(*vlan); + keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 686e57ce40f4..6c77cf7bedce 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_push_data; +static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = + (void *) BPF_FUNC_msg_pop_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = @@ -154,12 +156,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_tcp; static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, - int size, unsigned int netns_id, + int size, unsigned long long netns_id, unsigned long long flags) = (void *) BPF_FUNC_sk_lookup_udp; static int (*bpf_sk_release)(struct bpf_sock *sk) = @@ -168,6 +170,8 @@ static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = (void *) BPF_FUNC_skb_vlan_push; static int (*bpf_skb_vlan_pop)(void *ctx) = (void *) BPF_FUNC_skb_vlan_pop; +static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = + (void *) BPF_FUNC_rc_pointer_rel; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7f90d3645af8..37f947ec44ed 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,3 +22,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y +CONFIG_FTRACE_SYSCALLS=y diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c index 5a88a681d2ab..1fd244d35ba9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -21,23 +21,50 @@ int _version SEC("version") = 1; SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); + memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); + + tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); + tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip4 != tuple.ipv4.daddr || + sk->src_port != DST_REWRITE_PORT4) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - ///* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c index 8ea3f7d12dee..26397ab7b3c7 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -29,7 +29,43 @@ int _version SEC("version") = 1; SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in6 sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr)); + memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport)); + + tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0); + tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1); + tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2); + tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3); + + tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip6[0] != tuple.ipv6.daddr[0] || + sk->src_ip6[1] != tuple.ipv6.daddr[1] || + sk->src_ip6[2] != tuple.ipv6.daddr[2] || + sk->src_ip6[3] != tuple.ipv6.daddr[3] || + sk->src_port != DST_REWRITE_PORT6) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); @@ -39,21 +75,19 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) ctx->user_port = bpf_htons(DST_REWRITE_PORT6); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin6_family = AF_INET6; - sa.sin6_port = bpf_htons(0); + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); - sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); - sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); - sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); - sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c index 1198abca1360..9f741e69cebe 100644 --- a/tools/testing/selftests/bpf/netcnt_prog.c +++ b/tools/testing/selftests/bpf/netcnt_prog.c @@ -16,12 +16,18 @@ struct bpf_map_def SEC("maps") percpu_netcnt = { .value_size = sizeof(struct percpu_net_cnt), }; +BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, + struct percpu_net_cnt); + struct bpf_map_def SEC("maps") netcnt = { .type = BPF_MAP_TYPE_CGROUP_STORAGE, .key_size = sizeof(struct bpf_cgroup_storage_key), .value_size = sizeof(struct net_cnt), }; +BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key, + struct net_cnt); + SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 5f377ec53f2f..3c789d03b629 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -620,8 +620,8 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog), 2); + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index f42b3396d622..8bcd38010582 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5,6 +5,8 @@ #include <linux/btf.h> #include <linux/err.h> #include <linux/kernel.h> +#include <linux/filter.h> +#include <linux/unistd.h> #include <bpf/bpf.h> #include <sys/resource.h> #include <libelf.h> @@ -22,6 +24,9 @@ #include "bpf_rlimit.h" #include "bpf_util.h" +#define MAX_INSNS 512 +#define MAX_SUBPROGS 16 + static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; @@ -60,8 +65,8 @@ static int __base_pr(const char *format, ...) return err; } -#define BTF_INFO_ENC(kind, root, vlen) \ - ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) \ (name), (info), (size_or_type) @@ -81,28 +86,44 @@ static int __base_pr(const char *format, ...) #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ + ((bitfield_size) << 24 | (bits_offset)) #define BTF_TYPEDEF_ENC(name, type) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) -#define BTF_PTR_ENC(name, type) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) +#define BTF_PTR_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_CONST_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) + +#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) + +#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ + (name), (type) + +#define BTF_FUNC_ENC(name, func_proto) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define MAX_NR_RAW_TYPES 1024 +#define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 static struct args { unsigned int raw_test_num; unsigned int file_test_num; unsigned int get_info_test_num; + unsigned int info_raw_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; + bool info_raw_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -118,7 +139,7 @@ struct btf_raw_test { const char *str_sec; const char *map_name; const char *err_str; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; enum bpf_map_type map_type; __u32 key_size; @@ -137,6 +158,9 @@ struct btf_raw_test { int str_len_delta; }; +#define BTF_STR_SEC(str) \ + .str_sec = str, .str_sec_size = sizeof(str) + static struct btf_raw_test raw_tests[] = { /* enum E { * E0, @@ -432,11 +456,11 @@ static struct btf_raw_test raw_tests[] = { /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* typedef const void * const_void_ptr */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* struct A { */ /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* struct A { */ /* [5] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), /* const_void_ptr m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 0), + BTF_MEMBER_ENC(NAME_TBD, 4, 0), /* } */ BTF_END_RAW, }, @@ -494,10 +518,10 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* const_void_ptr[4] */ /* [5] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* const_void_ptr[4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ BTF_END_RAW, }, .str_sec = "\0const_void_ptr", @@ -1293,6 +1317,367 @@ static struct btf_raw_test raw_tests[] = { }, { + .descr = "typedef (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(0, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "typedef (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!int", + .str_sec_size = sizeof("\0__!int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "ptr type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "volatile type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "const type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "restrict type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!skb", + .str_sec_size = sizeof("\0__!skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "array type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct member (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B*", + .str_sec_size = sizeof("\0A\0B*"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B", + .str_sec_size = sizeof("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "enum type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!", + .str_sec_size = sizeof("\0A!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, +{ .descr = "arraymap invalid btf key (a bit field)", .raw_types = { /* int */ /* [1] */ @@ -1374,6 +1759,954 @@ static struct btf_raw_test raw_tests[] = { .map_create_err = true, }, +{ + .descr = "func proto (int (*)(int, unsigned int))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* int (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int, ...) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg with name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b, ... c) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c", + .str_sec_size = sizeof("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#3", +}, + +{ + .descr = "func proto (arg after vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, ..., unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* typedef void (*func_ptr)(int, unsigned int) */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ + /* const func_ptr */ + BTF_CONST_ENC(3), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_ptr", + .str_sec_size = sizeof("\0func_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + BTF_CONST_ENC(4), /* [3] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_typedef", + .str_sec_size = sizeof("\0func_typedef"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "func proto (btf_resolve(arg))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void (*)(const void *) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(0, 3), + BTF_CONST_ENC(4), /* [3] */ + BTF_PTR_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Not all arg has name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0b", + .str_sec_size = sizeof("\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Bad arg name_off)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int <bad_name_off>) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_END_RAW, + }, + .str_sec = "\0a", + .str_sec_size = sizeof("\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Bad arg name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int !!!) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0!!!", + .str_sec_size = sizeof("\0a\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Invalid return type)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* <bad_ret_type> (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid return type", +}, + +{ + .descr = "func proto (with func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void func_proto(int, unsigned int) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_proto", + .str_sec_size = sizeof("\0func_proto"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func proto (const void arg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(const void) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 4), + BTF_CONST_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, + +{ + .descr = "func (void func(int a, unsigned int b))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func (No func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void <no_name>(int a, unsigned int b) */ + BTF_FUNC_ENC(0, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Invalid func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void !!!(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0!!!", + .str_sec_size = sizeof("\0a\0b\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Some arg has no name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + /* void func(int a, unsigned int) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0func", + .str_sec_size = sizeof("\0a\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func (Non zero vlen)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "vlen != 0", +}, + +{ + .descr = "func (Not referring to FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0func", + .str_sec_size = sizeof("\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "invalid int kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 1, 0), 4), /* [2] */ + BTF_INT_ENC(0, 0, 32), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid ptr kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid array kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 1, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid enum kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid fwd kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid typedef kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_TYPEDEF, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid volatile kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid const kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid restrict kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 0), 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 1, 0), 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func_proto kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid struct, kind_flag, bitfield_size = 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 32)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 4)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 0)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid struct, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, bitfield base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 20, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid struct, kind_flag, base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 12, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 8)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid union, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 2), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, int member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + +{ + .descr = "invalid struct, kind_flag, enum member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -1381,11 +2714,11 @@ static const char *get_next_str(const char *start, const char *end) return start < end - 1 ? start + 1 : NULL; } -static int get_type_sec_size(const __u32 *raw_types) +static int get_raw_sec_size(const __u32 *raw_types) { int i; - for (i = MAX_NR_RAW_TYPES - 1; + for (i = MAX_NR_RAW_U32 - 1; i >= 0 && raw_types[i] != BTF_END_RAW; i--) ; @@ -1397,7 +2730,8 @@ static void *btf_raw_create(const struct btf_header *hdr, const __u32 *raw_types, const char *str, unsigned int str_sec_size, - unsigned int *btf_size) + unsigned int *btf_size, + const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; unsigned int size_needed, offset; @@ -1406,7 +2740,7 @@ static void *btf_raw_create(const struct btf_header *hdr, uint32_t *ret_types; void *raw_btf; - type_sec_size = get_type_sec_size(raw_types); + type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) return NULL; @@ -1445,6 +2779,8 @@ static void *btf_raw_create(const struct btf_header *hdr, ret_hdr->str_len = str_sec_size; *btf_size = size_needed; + if (ret_next_str) + *ret_next_str = next_str; return raw_btf; } @@ -1464,7 +2800,7 @@ static int do_test_raw(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1541,7 +2877,7 @@ static int test_raw(void) struct btf_get_info_test { const char *descr; const char *str_sec; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; int btf_size_delta; int (*special_test)(unsigned int test_num); @@ -1621,7 +2957,7 @@ static int test_big_btf_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1705,7 +3041,7 @@ static int test_btf_id(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1843,7 +3179,7 @@ static int do_test_get_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1940,13 +3276,13 @@ static struct btf_file_test file_tests[] = { }, }; -static int file_has_btf_elf(const char *fn) +static int file_has_btf_elf(const char *fn, bool *has_btf_ext) { Elf_Scn *scn = NULL; GElf_Ehdr ehdr; + int ret = 0; int elf_fd; Elf *elf; - int ret; if (CHECK(elf_version(EV_CURRENT) == EV_NONE, "elf_version(EV_CURRENT) == EV_NONE")) @@ -1978,14 +3314,12 @@ static int file_has_btf_elf(const char *fn) } sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!strcmp(sh_name, BTF_ELF_SEC)) { + if (!strcmp(sh_name, BTF_ELF_SEC)) ret = 1; - goto done; - } + if (!strcmp(sh_name, BTF_EXT_ELF_SEC)) + *has_btf_ext = true; } - ret = 0; - done: close(elf_fd); elf_end(elf); @@ -1995,15 +3329,24 @@ done: static int do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; + const char *expected_fnames[] = {"_dummy_tracepoint", + "test_long_fname_1", + "test_long_fname_2"}; + struct bpf_prog_info info = {}; struct bpf_object *obj = NULL; + struct bpf_func_info *finfo; struct bpf_program *prog; + __u32 info_len, rec_size; + bool has_btf_ext = false; + struct btf *btf = NULL; + void *func_info = NULL; struct bpf_map *map; - int err; + int i, err, prog_fd; fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, test->file); - err = file_has_btf_elf(test->file); + err = file_has_btf_elf(test->file, &has_btf_ext); if (err == -1) return err; @@ -2031,6 +3374,7 @@ static int do_test_file(unsigned int test_num) err = bpf_object__load(obj); if (CHECK(err < 0, "bpf_object__load: %d", err)) goto done; + prog_fd = bpf_program__fd(prog); map = bpf_object__find_map_by_name(obj, "btf_map"); if (CHECK(!map, "btf_map not found")) { @@ -2045,9 +3389,100 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; + if (!has_btf_ext) + goto skip; + + /* get necessary program info */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = 3; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + err = btf__get_from_id(info.btf_id, &btf); + if (CHECK(err, "cannot get btf from kernel, err: %d", err)) + goto done; + + /* check three functions */ + finfo = func_info; + for (i = 0; i < 3; i++) { + const struct btf_type *t; + const char *fname; + + t = btf__type_by_id(btf, finfo->type_id); + if (CHECK(!t, "btf__type_by_id failure: id %u", + finfo->type_id)) { + err = -1; + goto done; + } + + fname = btf__name_by_offset(btf, t->name_off); + err = strcmp(fname, expected_fnames[i]); + /* for the second and third functions in .text section, + * the compiler may order them either way. + */ + if (i && err) + err = strcmp(fname, expected_fnames[3 - i]); + if (CHECK(err, "incorrect fname %s", fname ? : "")) { + err = -1; + goto done; + } + + finfo = (void *)finfo + rec_size; + } + +skip: fprintf(stderr, "OK"); done: + free(func_info); bpf_object__close(obj); return err; } @@ -2093,7 +3528,8 @@ struct pprint_mapv { } aenum; }; -static struct btf_raw_test pprint_test_template = { +static struct btf_raw_test pprint_test_template[] = { +{ .raw_types = { /* unsighed char */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), @@ -2143,13 +3579,140 @@ static struct btf_raw_test pprint_test_template = { BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ BTF_END_RAW, }, - .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum", - .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same type as the + * first .raw_types definition, but struct type will + * be encoded with kind_flag set. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same layout as the + * first .raw_types definition. The struct type will + * be encoded with kind_flag set, bitfield members + * are added typedef/const/volatile, and bitfield members + * will have both int and enum types. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 8), 32), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + /* typedef unsigned int ___int */ /* [17] */ + BTF_TYPEDEF_ENC(NAME_TBD, 18), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0___int"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ .max_entries = 128 * 1024, +}, + }; static struct btf_pprint_test_meta { @@ -2252,9 +3815,9 @@ static int check_line(const char *expected_line, int nexpected_line, } -static int do_test_pprint(void) +static int do_test_pprint(int test_num) { - const struct btf_raw_test *test = &pprint_test_template; + const struct btf_raw_test *test = &pprint_test_template[test_num]; struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; @@ -2270,10 +3833,10 @@ static int do_test_pprint(void) uint8_t *raw_btf; ssize_t nread; - fprintf(stderr, "%s......", test->descr); + fprintf(stderr, "%s(#%d)......", test->descr, test_num); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2463,30 +4026,940 @@ static int test_pprint(void) unsigned int i; int err = 0; + /* test various maps with the first test template */ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { - pprint_test_template.descr = pprint_tests_meta[i].descr; - pprint_test_template.map_type = pprint_tests_meta[i].map_type; - pprint_test_template.map_name = pprint_tests_meta[i].map_name; - pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map; - pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map; - pprint_test_template.percpu_map = pprint_tests_meta[i].percpu_map; + pprint_test_template[0].descr = pprint_tests_meta[i].descr; + pprint_test_template[0].map_type = pprint_tests_meta[i].map_type; + pprint_test_template[0].map_name = pprint_tests_meta[i].map_name; + pprint_test_template[0].ordered_map = pprint_tests_meta[i].ordered_map; + pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; + + err |= count_result(do_test_pprint(0)); + } + + /* test rest test templates with the first map */ + for (i = 1; i < ARRAY_SIZE(pprint_test_template); i++) { + pprint_test_template[i].descr = pprint_tests_meta[0].descr; + pprint_test_template[i].map_type = pprint_tests_meta[0].map_type; + pprint_test_template[i].map_name = pprint_tests_meta[0].map_name; + pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; + pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; + pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; + err |= count_result(do_test_pprint(i)); + } + + return err; +} + +#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ + (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) - err |= count_result(do_test_pprint()); +static struct prog_info_raw_test { + const char *descr; + const char *str_sec; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + struct bpf_insn insns[MAX_INSNS]; + __u32 prog_type; + __u32 func_info[MAX_SUBPROGS][2]; + __u32 func_info_rec_size; + __u32 func_info_cnt; + __u32 line_info[MAX_NR_RAW_U32]; + __u32 line_info_rec_size; + __u32 nr_jited_ksyms; + bool expected_prog_load_failure; +} info_raw_tests[] = { +{ + .descr = "func_type (main func + one sub)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, +}, + +{ + .descr = "func_type (Incorrect func_info_rec_size)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 4, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect func_info_cnt)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 1, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect bpf_func_info.insn_off)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {2, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. insn_off >= prog->len)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "line_info[4].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (Zero bpf insn code)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0unsigned long\0u64\0u64 a=1;\0return a;"), + .insns = { + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, 0, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "Invalid insn code at line_info[1]", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog. zero tailing line_info", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. nonzero tailing line_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, + .err_str = "nonzero tailing record in line_info", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog. missing 1st func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#0", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. missing 2nd func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#1", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. unordered insn offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "Invalid line_info[2].insn_off", + .expected_prog_load_failure = true, +}, + +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static __u32 *patch_name_tbd(const __u32 *raw_u32, + const char *str, __u32 str_off, + unsigned int str_sec_size, + unsigned int *ret_size) +{ + int i, raw_u32_size = get_raw_sec_size(raw_u32); + const char *end_str = str + str_sec_size; + const char *next_str = str + str_off; + __u32 *new_u32 = NULL; + + if (raw_u32_size == -1) + return ERR_PTR(-EINVAL); + + if (!raw_u32_size) { + *ret_size = 0; + return NULL; + } + + new_u32 = malloc(raw_u32_size); + if (!new_u32) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { + if (raw_u32[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str\n")) { + free(new_u32); + return ERR_PTR(-EINVAL); + } + new_u32[i] = next_str - str; + next_str += strlen(next_str); + } else { + new_u32[i] = raw_u32[i]; + } + } + + *ret_size = raw_u32_size; + return new_u32; +} + +static int test_get_finfo(const struct prog_info_raw_test *test, + int prog_fd) +{ + struct bpf_prog_info info = {}; + struct bpf_func_info *finfo; + __u32 info_len, rec_size, i; + void *func_info = NULL; + int err; + + /* get necessary lens */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + return -1; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + return -1; + } + + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d", rec_size)) { + return -1; + } + + if (!info.nr_func_info) + return 0; + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) + return -1; + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = test->func_info_cnt; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + finfo = func_info; + for (i = 0; i < test->func_info_cnt; i++) { + if (CHECK(finfo->type_id != test->func_info[i][1], + "incorrect func_type %u expected %u", + finfo->type_id, test->func_info[i][1])) { + err = -1; + goto done; + } + finfo = (void *)finfo + rec_size; + } + + err = 0; + +done: + free(func_info); + return err; +} + +static int test_get_linfo(const struct prog_info_raw_test *test, + const void *patched_linfo, + __u32 cnt, int prog_fd) +{ + __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; + __u64 *jited_linfo = NULL, *jited_ksyms = NULL; + __u32 rec_size, jited_rec_size, jited_cnt; + struct bpf_line_info *linfo = NULL; + __u32 cur_func_len, ksyms_found; + struct bpf_prog_info info = {}; + __u32 *jited_func_lens = NULL; + __u64 cur_func_ksyms; + int err; + + jited_cnt = cnt; + rec_size = sizeof(*linfo); + jited_rec_size = sizeof(*jited_linfo); + if (test->nr_jited_ksyms) + nr_jited_ksyms = test->nr_jited_ksyms; + else + nr_jited_ksyms = test->func_info_cnt; + nr_jited_func_lens = nr_jited_ksyms; + + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + err = -1; + goto done; + } + + if (!info.jited_prog_len) { + /* prog is not jited */ + jited_cnt = 0; + nr_jited_ksyms = 1; + nr_jited_func_lens = 1; + } + + if (CHECK(info.nr_line_info != cnt || + info.nr_jited_line_info != jited_cnt || + info.nr_jited_ksyms != nr_jited_ksyms || + info.nr_jited_func_lens != nr_jited_func_lens || + (!info.nr_line_info && info.nr_jited_line_info), + "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.nr_jited_ksyms, nr_jited_ksyms, + info.nr_jited_func_lens, nr_jited_func_lens)) { + err = -1; + goto done; + } + + if (CHECK(info.line_info_rec_size != sizeof(struct bpf_line_info) || + info.jited_line_info_rec_size != sizeof(__u64), + "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size)) { + err = -1; + goto done; + } + + if (!cnt) + return 0; + + rec_size = info.line_info_rec_size; + jited_rec_size = info.jited_line_info_rec_size; + + memset(&info, 0, sizeof(info)); + + linfo = calloc(cnt, rec_size); + if (CHECK(!linfo, "!linfo")) { + err = -1; + goto done; + } + info.nr_line_info = cnt; + info.line_info_rec_size = rec_size; + info.line_info = ptr_to_u64(linfo); + + if (jited_cnt) { + jited_linfo = calloc(jited_cnt, jited_rec_size); + jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); + jited_func_lens = calloc(nr_jited_func_lens, + sizeof(*jited_func_lens)); + if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, + "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", + jited_linfo, jited_ksyms, jited_func_lens)) { + err = -1; + goto done; + } + + info.nr_jited_line_info = jited_cnt; + info.jited_line_info_rec_size = jited_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + info.nr_jited_ksyms = nr_jited_ksyms; + info.jited_ksyms = ptr_to_u64(jited_ksyms); + info.nr_jited_func_lens = nr_jited_func_lens; + info.jited_func_lens = ptr_to_u64(jited_func_lens); + } + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + /* + * Only recheck the info.*line_info* fields. + * Other fields are not the concern of this test. + */ + if (CHECK(err == -1 || + info.nr_line_info != cnt || + (jited_cnt && !info.jited_line_info) || + info.nr_jited_line_info != jited_cnt || + info.line_info_rec_size != rec_size || + info.jited_line_info_rec_size != jited_rec_size, + "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + err, errno, + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size, + (void *)(long)info.line_info, + (void *)(long)info.jited_line_info)) { + err = -1; + goto done; + } + + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", + linfo[0].insn_off); + for (i = 1; i < cnt; i++) { + const struct bpf_line_info *expected_linfo; + + expected_linfo = patched_linfo + (i * test->line_info_rec_size); + if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, + "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", + i, linfo[i].insn_off, + i - 1, linfo[i - 1].insn_off)) { + err = -1; + goto done; + } + if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || + linfo[i].line_off != expected_linfo->line_off || + linfo[i].line_col != expected_linfo->line_col, + "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, + linfo[i].file_name_off, + linfo[i].line_off, + linfo[i].line_col, + expected_linfo->file_name_off, + expected_linfo->line_off, + expected_linfo->line_col)) { + err = -1; + goto done; + } + } + + if (!jited_cnt) { + fprintf(stderr, "not jited. skipping jited_line_info check. "); + err = 0; + goto done; + } + + if (CHECK(jited_linfo[0] != jited_ksyms[0], + "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", + (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { + err = -1; + goto done; + } + + ksyms_found = 1; + cur_func_len = jited_func_lens[0]; + cur_func_ksyms = jited_ksyms[0]; + for (i = 1; i < jited_cnt; i++) { + if (ksyms_found < nr_jited_ksyms && + jited_linfo[i] == jited_ksyms[ksyms_found]) { + cur_func_ksyms = jited_ksyms[ksyms_found]; + cur_func_len = jited_ksyms[ksyms_found]; + ksyms_found++; + continue; + } + + if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], + "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", + i, (long)jited_linfo[i], + i - 1, (long)(jited_linfo[i - 1]))) { + err = -1; + goto done; + } + + if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, + "jited_linfo[%u]:%lx - %lx > %u", + i, (long)jited_linfo[i], (long)cur_func_ksyms, + cur_func_len)) { + err = -1; + goto done; + } + } + + if (CHECK(ksyms_found != nr_jited_ksyms, + "ksyms_found:%u != nr_jited_ksyms:%u", + ksyms_found, nr_jited_ksyms)) { + err = -1; + goto done; + } + + err = 0; + +done: + free(linfo); + free(jited_linfo); + free(jited_ksyms); + free(jited_func_lens); + return err; +} + +static int do_test_info_raw(unsigned int test_num) +{ + const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + unsigned int raw_btf_size, linfo_str_off, linfo_size; + int btf_fd = -1, prog_fd = -1, err = 0; + void *raw_btf, *patched_linfo = NULL; + const char *ret_next_str; + union bpf_attr attr = {}; + + fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size, &ret_next_str); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && args.always_log) + fprintf(stderr, "\n%s", btf_log_buf); + *btf_log_buf = '\0'; + + linfo_str_off = ret_next_str - test->str_sec; + patched_linfo = patch_name_tbd(test->line_info, + test->str_sec, linfo_str_off, + test->str_sec_size, &linfo_size); + if (IS_ERR(patched_linfo)) { + fprintf(stderr, "error in creating raw bpf_line_info"); + err = -1; + goto done; + } + + attr.prog_type = test->prog_type; + attr.insns = ptr_to_u64(test->insns); + attr.insn_cnt = probe_prog_length(test->insns); + attr.license = ptr_to_u64("GPL"); + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = ptr_to_u64(test->func_info); + attr.log_buf = ptr_to_u64(btf_log_buf); + attr.log_size = BTF_LOG_BUF_SIZE; + attr.log_level = 1; + if (linfo_size) { + attr.line_info_rec_size = test->line_info_rec_size; + attr.line_info = ptr_to_u64(patched_linfo); + attr.line_info_cnt = linfo_size / attr.line_info_rec_size; + } + + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = ((prog_fd == -1) != test->expected_prog_load_failure); + if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", + prog_fd, test->expected_prog_load_failure, errno) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; } + if (prog_fd == -1) + goto done; + + err = test_get_finfo(test, prog_fd); + if (err) + goto done; + + err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + if (err) + goto done; + +done: + if (!err) + fprintf(stderr, "OK"); + + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (prog_fd != -1) + close(prog_fd); + + if (!IS_ERR(patched_linfo)) + free(patched_linfo); + + return err; +} + +static int test_info_raw(void) +{ + unsigned int i; + int err = 0; + + if (args.info_raw_test_num) + return count_result(do_test_info_raw(args.info_raw_test_num)); + + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + err |= count_result(do_test_info_raw(i)); + return err; } static void usage(const char *cmd) { - fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" + "\t[-g btf_get_info_test_num (1 - %zu)] |\n" + "\t[-f btf_file_test_num (1 - %zu)] |\n" + "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" + "\t[-p (pretty print test)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpf:r:g:"; + const char *optstr = "lpk:f:r:g:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -2509,6 +4982,10 @@ static int parse_args(int argc, char **argv) case 'p': args.pprint_test = true; break; + case 'k': + args.info_raw_test_num = atoi(optarg); + args.info_raw_test = true; + break; case 'h': usage(argv[0]); exit(0); @@ -2542,6 +5019,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.info_raw_test_num && + (args.info_raw_test_num < 1 || + args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { + fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", + ARRAY_SIZE(info_raw_tests)); + return -1; + } + return 0; } @@ -2574,13 +5059,17 @@ int main(int argc, char **argv) if (args.pprint_test) err |= test_pprint(); + if (args.info_raw_test) + err |= test_info_raw(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test) + args.pprint_test || args.info_raw_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); + err |= test_info_raw(); done: print_summary(); diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c index b21b876f475d..e5c79fe0ffdb 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -24,8 +24,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c index 0ed8e088eebf..434188c37774 100644 --- a/tools/testing/selftests/bpf/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -22,8 +22,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index c0fb073b5eab..d23d4da66b83 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -59,7 +59,7 @@ else fi # Attach BPF program -./flow_dissector_load -p bpf_flow.o -s dissect +./flow_dissector_load -p bpf_flow.o -s flow_dissector # Setup tc qdisc add dev lo ingress diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index 156d89f1edcc..2989b2e2d856 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9 libbpf_open_file test_l4lb.o -# TODO: fix libbpf to load noinline functions -# [warning] libbpf: incorrect bpf_call opcode -#libbpf_open_file test_l4lb_noinline.o +# Load a program with BPF-to-BPF calls +libbpf_open_file test_l4lb_noinline.o -# TODO: fix test_xdp_meta.c to load with libbpf -# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version -#libbpf_open_file test_xdp_meta.o - -# TODO: fix libbpf to handle .eh_frame -# [warning] libbpf: relocation failed: no section(10) -#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o +# Load a program compiled without the "-target bpf" flag +libbpf_open_file test_xdp.o # Success exit 0 diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index 677686198df3..ec4e15948e40 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -21,13 +21,14 @@ do if grep -q DRV_NAME=rc-loopback $i/uevent then LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q) + INPUTDEV=$(grep DEVNAME= $i/input*/event*/uevent | sed sQDEVNAME=Q/dev/Q) fi done if [ -n $LIRCDEV ]; then TYPE=lirc_mode2 - ./test_lirc_mode2_user $LIRCDEV + ./test_lirc_mode2_user $LIRCDEV $INPUTDEV ret=$? if [ $ret -ne 0 ]; then echo -e ${RED}"FAIL: $TYPE"${NC} diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c index ba26855563a5..4147130cc3b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c @@ -15,6 +15,9 @@ int bpf_decoder(unsigned int *sample) if (duration & 0x10000) bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + if (duration & 0x20000) + bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, + duration & 0xff); } return 0; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index d470d63c33db..fb5fd6841ef3 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -29,6 +29,7 @@ #include <linux/bpf.h> #include <linux/lirc.h> +#include <linux/input.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -47,12 +48,13 @@ int main(int argc, char **argv) { struct bpf_object *obj; - int ret, lircfd, progfd, mode; - int testir = 0x1dead; + int ret, lircfd, progfd, inputfd; + int testir1 = 0x1dead; + int testir2 = 0x20101; u32 prog_ids[10], prog_flags[10], prog_cnt; - if (argc != 2) { - printf("Usage: %s /dev/lircN\n", argv[0]); + if (argc != 3) { + printf("Usage: %s /dev/lircN /dev/input/eventM\n", argv[0]); return 2; } @@ -76,9 +78,9 @@ int main(int argc, char **argv) return 1; } - mode = LIRC_MODE_SCANCODE; - if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) { - printf("failed to set rec mode: %m\n"); + inputfd = open(argv[2], O_RDONLY | O_NONBLOCK); + if (inputfd == -1) { + printf("failed to open input device %s: %m\n", argv[1]); return 1; } @@ -102,29 +104,54 @@ int main(int argc, char **argv) } /* Write raw IR */ - ret = write(lircfd, &testir, sizeof(testir)); - if (ret != sizeof(testir)) { + ret = write(lircfd, &testir1, sizeof(testir1)); + if (ret != sizeof(testir1)) { printf("Failed to send test IR message: %m\n"); return 1; } - struct pollfd pfd = { .fd = lircfd, .events = POLLIN }; - struct lirc_scancode lsc; + struct pollfd pfd = { .fd = inputfd, .events = POLLIN }; + struct input_event event; - poll(&pfd, 1, 100); + for (;;) { + poll(&pfd, 1, 100); - /* Read decoded IR */ - ret = read(lircfd, &lsc, sizeof(lsc)); - if (ret != sizeof(lsc)) { - printf("Failed to read decoded IR: %m\n"); - return 1; + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_MSC && event.code == MSC_SCAN && + event.value == 0xdead) { + break; + } } - if (lsc.scancode != 0xdead || lsc.rc_proto != 64) { - printf("Incorrect scancode decoded\n"); + /* Write raw IR */ + ret = write(lircfd, &testir2, sizeof(testir2)); + if (ret != sizeof(testir2)) { + printf("Failed to send test IR message: %m\n"); return 1; } + for (;;) { + poll(&pfd, 1, 100); + + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_REL && event.code == REL_Y && + event.value == 1 ) { + break; + } + } + prog_cnt = 10; ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, &prog_cnt); diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/test_map_in_map.c new file mode 100644 index 000000000000..ce923e67e08e --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") mim_array = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +struct bpf_map_def SEC("maps") mim_hash = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +SEC("xdp_mimtest") +int xdp_mimtest0(struct xdp_md *ctx) +{ + int value = 123; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&mim_array, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + map = bpf_map_lookup_elem(&mim_hash, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + return XDP_PASS; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 4db2116e52be..9c79ee017df3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -258,24 +258,36 @@ static void test_hashmap_percpu(int task, void *data) close(fd); } -static void test_hashmap_walk(int task, void *data) +static int helper_fill_hashmap(int max_entries) { - int fd, i, max_entries = 1000; - long long key, value, next_key; - bool next_key_valid = true; + int i, fd, ret; + long long key, value; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), max_entries, map_flags); - if (fd < 0) { - printf("Failed to create hashmap '%s'!\n", strerror(errno)); - exit(1); - } + CHECK(fd < 0, + "failed to create hashmap", + "err: %s, flags: 0x%x\n", strerror(errno), map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0); + ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(ret != 0, + "can't update hashmap", + "err: %s\n", strerror(ret)); } + return fd; +} + +static void test_hashmap_walk(int task, void *data) +{ + int fd, i, max_entries = 1000; + long long key, value, next_key; + bool next_key_valid = true; + + fd = helper_fill_hashmap(max_entries); + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, &next_key) == 0; i++) { key = next_key; @@ -306,6 +318,39 @@ static void test_hashmap_walk(int task, void *data) close(fd); } +static void test_hashmap_zero_seed(void) +{ + int i, first, second, old_flags; + long long key, next_first, next_second; + + old_flags = map_flags; + map_flags |= BPF_F_ZERO_SEED; + + first = helper_fill_hashmap(3); + second = helper_fill_hashmap(3); + + for (i = 0; ; i++) { + void *key_ptr = !i ? NULL : &key; + + if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0) + break; + + CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0, + "next_key for second map must succeed", + "key_ptr: %p", key_ptr); + CHECK(next_first != next_second, + "keys must match", + "i: %d first: %lld second: %lld\n", i, + next_first, next_second); + + key = next_first; + } + + map_flags = old_flags; + close(first); + close(second); +} + static void test_arraymap(int task, void *data) { int key, next_key, fd; @@ -1080,6 +1125,94 @@ out_sockmap: exit(1); } +#define MAPINMAP_PROG "./test_map_in_map.o" +static void test_map_in_map(void) +{ + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + int mim_fd, fd, err; + int pos = 0; + + obj = bpf_object__open(MAPINMAP_PROG); + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), + 2, 0); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for hash of maps\n"); + goto out_map_in_map; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_xdp(prog); + } + bpf_object__load(obj); + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for array of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for hash of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update hash of maps\n"); + goto out_map_in_map; + } + + close(fd); + bpf_object__close(obj); + return; + +out_map_in_map: + close(fd); + exit(1); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -1534,6 +1667,7 @@ static void run_all_tests(void) test_hashmap(0, NULL); test_hashmap_percpu(0, NULL); test_hashmap_walk(0, NULL); + test_hashmap_zero_seed(); test_arraymap(0, NULL); test_arraymap_percpu(0, NULL); @@ -1554,6 +1688,8 @@ static void run_all_tests(void) test_queuemap(0, NULL); test_stackmap(0, NULL); + + test_map_in_map(); } int main(void) diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c index 7887df693399..44ed7f29f8ab 100644 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -81,7 +81,10 @@ int main(int argc, char **argv) goto err; } - assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0); + if (system("which ping6 &>/dev/null") == 0) + assert(!system("ping6 localhost -c 10000 -f -q > /dev/null")); + else + assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null")); if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, &prog_cnt)) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 2d3c04f45530..126fc624290d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -51,10 +51,10 @@ static struct { struct iphdr iph; struct tcphdr tcp; } __packed pkt_v4 = { - .eth.h_proto = bpf_htons(ETH_P_IP), + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = 6, - .iph.tot_len = bpf_htons(MAGIC_BYTES), + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; @@ -64,13 +64,13 @@ static struct { struct ipv6hdr iph; struct tcphdr tcp; } __packed pkt_v6 = { - .eth.h_proto = bpf_htons(ETH_P_IPV6), + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = 6, - .iph.payload_len = bpf_htons(MAGIC_BYTES), + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; -#define CHECK(condition, tag, format...) ({ \ +#define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ error_cnt++; \ @@ -83,6 +83,11 @@ static struct { __ret; \ }) +#define CHECK(condition, tag, format...) \ + _CHECK(condition, tag, duration, format) +#define CHECK_ATTR(condition, tag, format...) \ + _CHECK(condition, tag, tattr.duration, format) + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -124,6 +129,53 @@ static void test_pkt_access(void) bpf_object__close(obj); } +static void test_prog_run_xattr(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + char buf[10]; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = 5, + }; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + memset(buf, 0, sizeof(buf)); + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", + "incorrect output size, want %lu have %u\n", + sizeof(pkt_v4), tattr.data_size_out); + + CHECK_ATTR(buf[5] != 0, "overflow", + "BPF_PROG_TEST_RUN ignored size hint\n"); + + tattr.data_out = NULL; + tattr.data_size_out = 0; + errno = 0; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err || errno || tattr.retval, "run_no_output", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + tattr.data_size_out = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); + + bpf_object__close(obj); +} + static void test_xdp(void) { struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -524,7 +576,7 @@ static void test_bpf_obj_id(void) load_time < now - 60 || load_time > now + 60 || prog_infos[i].created_by_uid != my_uid || prog_infos[i].nr_map_ids != 1 || - *(int *)prog_infos[i].map_ids != map_infos[i].id || + *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", @@ -539,7 +591,7 @@ static void test_bpf_obj_id(void) load_time, now, prog_infos[i].created_by_uid, my_uid, prog_infos[i].nr_map_ids, 1, - *(int *)prog_infos[i].map_ids, map_infos[i].id, + *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; } @@ -585,7 +637,7 @@ static void test_bpf_obj_id(void) bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); - saved_map_id = *(int *)(prog_infos[i].map_ids); + saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); @@ -593,12 +645,12 @@ static void test_bpf_obj_id(void) prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)prog_info.map_ids != saved_map_id, + *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)prog_info.map_ids, saved_map_id); + *(int *)(long)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -1703,7 +1755,7 @@ static void test_reference_tracking() const char *file = "./test_sk_lookup_kern.o"; struct bpf_object *obj; struct bpf_program *prog; - __u32 duration; + __u32 duration = 0; int err = 0; obj = bpf_object__open(file); @@ -1837,6 +1889,7 @@ int main(void) jit_enabled = is_jit_enabled(); test_pkt_access(); + test_prog_run_xattr(); test_xdp(); test_xdp_adjust_tail(); test_l4lb_all(); diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c index b745bdc08c2b..e21cd736c196 100644 --- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return TC_ACT_SHOT; tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); - sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return sk ? TC_ACT_OK : TC_ACT_UNSPEC; @@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) bpf_sk_release(sk); return 0; @@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family = 0; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { bpf_sk_release(sk); family = sk->family; @@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); if (sk) { sk += 1; bpf_sk_release(sk); @@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) struct bpf_sock *sk; __u32 family; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); sk += 1; if (sk) bpf_sk_release(sk); @@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); return 0; } @@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); bpf_sk_release(sk); return 0; @@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) struct bpf_sock_tuple tuple = {}; struct bpf_sock *sk; - sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); bpf_sk_release(sk); return 0; } @@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb) void lookup_no_release(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; - bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0); + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } SEC("fail_no_release_subcall") diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index aeeb76a54d63..73b7493d4120 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -574,24 +574,44 @@ static int bind4_prog_load(const struct sock_addr_test *test) /* if (sk.family == AF_INET && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24), /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, type)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20), /* 1st_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18), + + /* 2nd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16), + + /* 3rd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14), + + /* 4th_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12), /* 1st_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10), + + /* 2nd_half_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8), /* whole_user_ip4 == expected) { */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 622ade0a0957..e85a771f607b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -79,6 +79,8 @@ int txmsg_start; int txmsg_end; int txmsg_start_push; int txmsg_end_push; +int txmsg_start_pop; +int txmsg_pop; int txmsg_ingress; int txmsg_skb; int ktls; @@ -104,6 +106,8 @@ static const struct option long_options[] = { {"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_start_push", required_argument, NULL, 'p'}, {"txmsg_end_push", required_argument, NULL, 'q'}, + {"txmsg_start_pop", required_argument, NULL, 'w'}, + {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, @@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } else { int slct, recvp = 0, recv, max_fd = fd; + float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; - float total_bytes; fd_set w; fcntl(fd, fd_flags); + /* Account for pop bytes noting each iteration of apply will + * call msg_pop_data helper so we need to account for this + * by calculating the number of apply iterations. Note user + * of the tool can create cases where no data is sent by + * manipulating pop/push/pull/etc. For example txmsg_apply 1 + * with txmsg_pop 1 will try to apply 1B at a time but each + * iteration will then pop 1B so no data will ever be sent. + * This is really only useful for testing edge cases in code + * paths. + */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + txmsg_pop_total = txmsg_pop; + if (txmsg_apply) + txmsg_pop_total *= (total_bytes / txmsg_apply); + total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); @@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, timeout.tv_sec = 0; timeout.tv_usec = 300000; } else { - timeout.tv_sec = 1; + timeout.tv_sec = 3; timeout.tv_usec = 0; } @@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } else if (!slct) { if (opt->verbose) - fprintf(stderr, "unexpected timeout\n"); + fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; @@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err && opt->verbose) + if (opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -931,6 +949,39 @@ run: } } + if (txmsg_start_pop) { + i = 4; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n", + txmsg_start_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 4; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + } + + if (txmsg_pop) { + i = 5; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n", + txmsg_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 5; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -1082,6 +1133,11 @@ static void test_options(char *options) snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); strncat(options, tstr, OPTSTRING); } + if (txmsg_start_pop) { + snprintf(tstr, OPTSTRING, "pop (%d,%d),", + txmsg_start_pop, txmsg_start_pop + txmsg_pop); + strncat(options, tstr, OPTSTRING); + } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) @@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp) txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; txmsg_start_push = txmsg_end_push = 0; + txmsg_start_pop = txmsg_pop = 0; /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; @@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp) txmsg_end = 2; txmsg_start_push = 1; txmsg_end_push = 2; + txmsg_start_pop = 1; + txmsg_pop = 1; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Cut a byte of pushed data but leave reamining in place */ + txmsg_start = 1; + txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 3; + txmsg_start_pop = 1; + txmsg_pop = 1; err = test_txmsg(cgrp); if (err) goto out; @@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp) opt.iov_length = 100; txmsg_cork = 1600; + txmsg_start_pop = 0; + txmsg_pop = 0; + for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; @@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp) goto out; } + /* Test pop data in middle of cork */ + for (i = 99; i <= 1600; i += 500) { + txmsg_start_pop = 10; + txmsg_pop = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end with cork but pull data in middle */ for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; @@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop with cork pulling last sg entry */ + txmsg_start_pop = 1500; + txmsg_pop = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; @@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop of single byte in last page */ + txmsg_start_pop = 1111; + txmsg_pop = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; @@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp) txmsg_start_push = 1601; txmsg_end_push = 1600; err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with start > data */ + txmsg_start_pop = 1601; + txmsg_pop = 1; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test pop with pop range > data */ + txmsg_start_pop = 1599; + txmsg_pop = 10; + err = test_exec(cgrp, &opt); out: txmsg_start = 0; txmsg_end = 0; @@ -1641,6 +1754,12 @@ int main(int argc, char **argv) case 'q': txmsg_end_push = atoi(optarg); break; + case 'w': + txmsg_start_pop = atoi(optarg); + break; + case 'x': + txmsg_pop = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 14b8bbac004f..e7639f66a941 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 4 + .max_entries = 6 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; - int *bytes, len1, len2 = 0, len3; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *bytes, len1, len2 = 0, len3, len4; int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length push_update %i->%i\n", len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg2: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg2: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); @@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push, *f; - int zero = 0, one = 1, two = 2, three = 3, key = 0; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg) if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; - int *bytes, *start, *end, *start_push, *end_push, *f; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int len1, len2 = 0, len3, len4; int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; @@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg) len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg4: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg4: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push; - int zero = 0, one = 1, two = 2, three = 3; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); - + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + bpf_printk("return sk drop\n"); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/test_tcpnotify.h b/tools/testing/selftests/bpf/test_tcpnotify.h new file mode 100644 index 000000000000..8b6cea030bfc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpnotify_globals { + __u32 total_retrans; + __u32 ncalls; +}; + +struct tcp_notifier { + __u8 type; + __u8 subtype; + __u8 source; + __u8 hash; +}; + +#define TESTPORT 12877 +#endif diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/test_tcpnotify_kern.c new file mode 100644 index 000000000000..edbca203ce2d --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpnotify.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpnotify_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") perf_event_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + op = (int) skops->op; + + if (bpf_ntohl(skops->remote_port) != TESTPORT) { + skops->reply = -1; + return 0; + } + + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + case BPF_SOCK_OPS_RWND_INIT: + case BPF_SOCK_OPS_NEEDS_ECN: + case BPF_SOCK_OPS_BASE_RTT: + case BPF_SOCK_OPS_RTO_CB: + rv = 1; + break; + + case BPF_SOCK_OPS_TCP_CONNECT_CB: + case BPF_SOCK_OPS_TCP_LISTEN_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG| + BPF_SOCK_OPS_RTO_CB_FLAG)); + rv = 1; + break; + case BPF_SOCK_OPS_RETRANS_CB: { + __u32 key = 0; + struct tcpnotify_globals g, *gp; + struct tcp_notifier msg = { + .type = 0xde, + .subtype = 0xad, + .source = 0xbe, + .hash = 0xef, + }; + + rv = 1; + + /* Update results */ + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.ncalls++; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + bpf_perf_event_output(skops, &perf_event_map, + BPF_F_CURRENT_CPU, + &msg, sizeof(msg)); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c new file mode 100644 index 000000000000..ff3c4522aed6 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <asm/types.h> +#include <sys/syscall.h> +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <sys/socket.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <sys/ioctl.h> +#include <linux/rtnetlink.h> +#include <signal.h> +#include <linux/perf_event.h> + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#include "test_tcpnotify.h" +#include "trace_helpers.h" + +#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) + +pthread_t tid; +int rx_callbacks; + +static int dummyfn(void *data, int size) +{ + struct tcp_notifier *t = data; + + if (t->type != 0xde || t->subtype != 0xad || + t->source != 0xbe || t->hash != 0xef) + return 1; + rx_callbacks++; + return 0; +} + +void tcp_notifier_poller(int fd) +{ + while (1) + perf_event_poller(fd, dummyfn); +} + +static void *poller_thread(void *arg) +{ + int fd = *(int *)arg; + + tcp_notifier_poller(fd); + return arg; +} + +int verify_result(const struct tcpnotify_globals *result) +{ + return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1); +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +static int setup_bpf_perf_event(int mapfd) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + }; + int key = 0; + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (pmu_fd < 0) + return pmu_fd; + bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY); + + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + return pmu_fd; +} + +int main(int argc, char **argv) +{ + const char *file = "test_tcpnotify_kern.o"; + int prog_fd, map_fd, perf_event_fd; + struct tcpnotify_globals g = {0}; + const char *cg_path = "/foo"; + int error = EXIT_FAILURE; + struct bpf_object *obj; + int cg_fd = -1; + __u32 key = 0; + int rv; + char test_script[80]; + int pmu_fd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + if (setup_cgroup_environment()) + goto err; + + cg_fd = create_and_get_cgroup(cg_path); + if (!cg_fd) + goto err; + + if (join_cgroup(cg_path)) + goto err; + + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); + if (perf_event_fd < 0) + goto err; + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + pmu_fd = setup_bpf_perf_event(perf_event_fd); + if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) + goto err; + + pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); + + sprintf(test_script, + "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + sleep(10); + + if (verify_result(&g)) { + printf("FAILED: Wrong stats Expected %d calls, got %d\n", + g.ncalls, rx_callbacks); + goto err; + } + + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); + return error; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 6f61df62f690..baafe5c76aca 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -49,6 +49,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 #define MAX_NR_MAPS 13 +#define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -76,7 +77,7 @@ struct bpf_test { int fixup_percpu_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval, retval_unpriv; + uint32_t retval, retval_unpriv, insn_processed; enum { UNDEF, ACCEPT, @@ -86,6 +87,14 @@ struct bpf_test { uint8_t flags; __u8 data[TEST_DATA_LEN]; void (*fill_helper)(struct bpf_test *self); + uint8_t runs; + struct { + uint32_t retval, retval_unpriv; + union { + __u8 data[TEST_DATA_LEN]; + __u64 data64[TEST_DATA_LEN / 8]; + }; + } retvals[MAX_TEST_RUNS]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -721,8 +730,18 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode c4", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on imm 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -16069393, }, { "arsh32 on reg", @@ -732,8 +751,19 @@ static struct bpf_test tests[] = { BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode cc", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on reg 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), + BPF_MOV64_IMM(BPF_REG_1, 15), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 43724, }, { "arsh64 on imm", @@ -980,15 +1010,45 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), /* mess up with R1 pointer on stack */ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 should fail */ + /* fill back into R0 is fine for priv. + * R0 now becomes SCALAR_VALUE. + */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + /* Load from R0 should fail. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), BPF_EXIT_INSN(), }, .errstr_unpriv = "attempt to corrupt spilled", - .errstr = "corrupted spill", + .errstr = "R0 invalid mem access 'inv", .result = REJECT, }, { + "check corrupted spill/fill, LSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "check corrupted spill/fill, MSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { "invalid src register in STX", .insns = { BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1), @@ -1792,10 +1852,20 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { - "invalid 64B read of family in SK_MSG", + "valid access size in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, size)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid 64B read of size in SK_MSG", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, family)), + offsetof(struct sk_msg_md, size)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1806,10 +1876,10 @@ static struct bpf_test tests[] = { "invalid read past end of SK_MSG", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, local_port) + 4), + offsetof(struct sk_msg_md, size) + 4), BPF_EXIT_INSN(), }, - .errstr = "R0 !read_ok", + .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, }, @@ -1823,6 +1893,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet read for SK_MSG", @@ -2026,29 +2097,27 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash byte load not permitted 1", + "check skb->hash byte load permitted 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 2", + "check skb->hash byte load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 3", + "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2060,8 +2129,7 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { "check cb access: byte, wrong type", @@ -2173,7 +2241,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash half load not permitted", + "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2185,10 +2253,43 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, + .result = ACCEPT, + }, + { + "check skb->hash half load not permitted, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, .errstr = "invalid bpf_context access", .result = REJECT, }, { + "check skb->hash half load not permitted, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { "check cb access: half, wrong type", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -2418,6 +2519,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -2904,6 +3009,19 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { "unpriv: partial copy of pointer", .insns = { BPF_MOV32_REG(BPF_REG_1, BPF_REG_10), @@ -3249,6 +3367,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R0 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs corruption 2", @@ -3279,6 +3398,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R3 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs + data", @@ -3778,6 +3898,7 @@ static struct bpf_test tests[] = { .errstr = "R2 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test16 (arith on data_end)", @@ -3880,6 +4001,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test21 (x += pkt_ptr, 2)", @@ -3905,6 +4027,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test22 (x += pkt_ptr, 3)", @@ -3935,6 +4058,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test23 (x += pkt_ptr, 4)", @@ -3961,6 +4085,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -3986,6 +4111,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test25 (marking on <, good access)", @@ -5117,6 +5243,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid cgroup storage access 5", @@ -5233,6 +5360,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid per-cpu cgroup storage access 5", @@ -5270,6 +5398,31 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "multiple registers share map_lookup_elem result", .insns = { BPF_MOV64_IMM(BPF_REG_1, 10), @@ -7149,6 +7302,7 @@ static struct bpf_test tests[] = { .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value illegal alu op, 5", @@ -7171,6 +7325,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_48b = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value is preserved across register spilling", @@ -7664,6 +7819,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -8576,7 +8732,7 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map_hash_8b = { 4 }, - .errstr = "R0 invalid mem access 'inv'", + .errstr = "unbounded min value", .result = REJECT, }, { @@ -9626,6 +9782,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' > pkt_end, bad access 1", @@ -9663,6 +9820,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', good access", @@ -9701,6 +9859,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', bad access 2", @@ -9719,6 +9878,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, good access", @@ -9757,6 +9917,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, bad access 2", @@ -9775,6 +9936,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', good access", @@ -9792,6 +9954,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', bad access 1", @@ -9829,6 +9992,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, good access", @@ -9865,6 +10029,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, bad access 2", @@ -9902,6 +10067,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", @@ -9940,6 +10106,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, good access", @@ -9958,6 +10125,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, bad access 1", @@ -9996,6 +10164,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', good access", @@ -10032,6 +10201,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", @@ -10068,6 +10238,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' > pkt_data, bad access 1", @@ -10105,6 +10276,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', good access", @@ -10143,6 +10315,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', bad access 2", @@ -10161,6 +10334,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, good access", @@ -10199,6 +10373,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, bad access 2", @@ -10217,6 +10392,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', good access", @@ -10234,6 +10410,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', bad access 1", @@ -10271,6 +10448,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, good access", @@ -10307,6 +10485,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", @@ -10344,6 +10523,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data >= pkt_meta', bad access 1", @@ -10382,6 +10562,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, good access", @@ -10400,6 +10581,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", @@ -10438,6 +10620,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', good access", @@ -10474,6 +10657,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', bad access 2", @@ -10547,7 +10731,7 @@ static struct bpf_test tests[] = { "check deducing bounds from const, 5", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, @@ -10578,6 +10762,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 8", @@ -10591,6 +10776,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 9", @@ -11065,6 +11251,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R6 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls with args", @@ -11930,6 +12117,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", @@ -12073,6 +12261,7 @@ static struct bpf_test tests[] = { .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value_ptr_or_null via arg. test1", @@ -12244,6 +12433,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 2", @@ -12275,6 +12465,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 3", @@ -12310,6 +12501,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 4", @@ -12344,6 +12536,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 5", @@ -12377,6 +12570,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "same insn cannot be used with different", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 6", @@ -12412,6 +12606,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 7", @@ -12446,6 +12641,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 8", @@ -12486,6 +12682,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 9", @@ -12527,6 +12724,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: caller stack init to zero or map_value_or_null", @@ -12892,6 +13090,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "xadd/w check whether src/dst got mangled, 1", @@ -13378,6 +13577,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "Unreleased reference", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking: alloc, check, free in both subbranches", @@ -13406,6 +13606,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "reference tracking in call: free reference in subprog", @@ -13496,6 +13697,28 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "allocated_stack", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8), + BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9), + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, + .insn_processed = 15, + }, + { "reference tracking in call: free reference in subprog and outside", .insns = { BPF_SK_LOOKUP, @@ -13896,6 +14119,274 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "calls: ctx read at start of subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "check wire_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check wire_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "check wire_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, + }, + { + "calls: cross frame pruning", + .insns = { + /* r8 = !!random(); + * call pruner() + * if (r8) + * do something bad; + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result = REJECT, + }, + { + "jset: functional", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + /* reg, bit 63 or bit 0 set, taken */ + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + + /* reg, bit 62, not taken */ + BPF_LD_IMM64(BPF_REG_8, 0x4000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + /* imm, any bit set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + + /* imm, bit 31 set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + /* all good - return r0 == 2 */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 7, + .retvals = { + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31), } + }, + { .retval = 2, + .data64 = { (1ULL << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (__u32)-1, } + }, + { .retval = 2, + .data64 = { ~0x4000000000000000ULL, } + }, + { .retval = 0, + .data64 = { 0, } + }, + { .retval = 0, + .data64 = { ~0ULL, } + }, + }, + }, + { + "jset: sign-extend", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, + }, + { + "jset: known const compare", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .retval_unpriv = 1, + .result_unpriv = ACCEPT, + .retval = 1, + .result = ACCEPT, + }, + { + "jset: known const compare bad", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare not taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: half-known const compare", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, + { + "jset: range", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -13921,7 +14412,7 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(enum bpf_map_type prog_type) +static int create_prog_dummy1(enum bpf_prog_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), @@ -13932,7 +14423,7 @@ static int create_prog_dummy1(enum bpf_map_type prog_type) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) +static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -13947,7 +14438,7 @@ static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem, +static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, int p1key) { int p2key = 1; @@ -14018,7 +14509,7 @@ static int create_cgroup_storage(bool percpu) static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, +static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; @@ -14147,7 +14638,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type, do { prog[*fixup_map_stacktrace].imm = map_fds[12]; fixup_map_stacktrace++; - } while (fixup_map_stacktrace); + } while (*fixup_map_stacktrace); } } @@ -14178,16 +14669,43 @@ out: return ret; } +static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, + void *data, size_t size_data) +{ + __u8 tmp[TEST_DATA_LEN << 2]; + __u32 size_tmp = sizeof(tmp); + uint32_t retval; + int err; + + if (unpriv) + set_admin(true); + err = bpf_prog_test_run(fd_prog, 1, data, size_data, + tmp, &size_tmp, &retval, NULL); + if (unpriv) + set_admin(false); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error "); + return err; + } + if (!err && retval != expected_val && + expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d ", retval, expected_val); + return 1; + } + + return 0; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, reject_from_alignment; + int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; + int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; - uint32_t expected_val; - uint32_t retval; + __u32 pflags; int i, err; for (i = 0; i < MAX_NR_MAPS; i++) @@ -14198,69 +14716,105 @@ static void do_test_single(struct bpf_test *test, bool unpriv, do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type, prog, prog_len, - test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + pflags = 0; + if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) + pflags |= BPF_F_STRICT_ALIGNMENT; + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + pflags |= BPF_F_ANY_ALIGNMENT; + fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; - expected_val = unpriv && test->retval_unpriv ? - test->retval_unpriv : test->retval; - - reject_from_alignment = fd_prog < 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (reject_from_alignment) { - printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", - strerror(errno)); - goto fail_log; - } -#endif + + alignment_prevented_execution = 0; + if (expected_ret == ACCEPT) { - if (fd_prog < 0 && !reject_from_alignment) { + if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (fd_prog >= 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) + alignment_prevented_execution = 1; +#endif } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { + if (!strstr(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; } } - if (fd_prog >= 0) { - __u8 tmp[TEST_DATA_LEN << 2]; - __u32 size_tmp = sizeof(tmp); - - if (unpriv) - set_admin(true); - err = bpf_prog_test_run(fd_prog, 1, test->data, - sizeof(test->data), tmp, &size_tmp, - &retval, NULL); - if (unpriv) - set_admin(false); - if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { - printf("Unexpected bpf_prog_test_run error\n"); + if (test->insn_processed) { + uint32_t insn_processed; + char *proc; + + proc = strstr(bpf_vlog, "processed "); + insn_processed = atoi(proc + 10); + if (test->insn_processed != insn_processed) { + printf("FAIL\nUnexpected insn_processed %u vs %u\n", + insn_processed, test->insn_processed); goto fail_log; } - if (!err && retval != expected_val && - expected_val != POINTER_VALUE) { - printf("FAIL retval %d != %d\n", retval, expected_val); - goto fail_log; + } + + run_errs = 0; + run_successes = 0; + if (!alignment_prevented_execution && fd_prog >= 0) { + uint32_t expected_val; + int i; + + if (!test->runs) { + expected_val = unpriv && test->retval_unpriv ? + test->retval_unpriv : test->retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->data, sizeof(test->data)); + if (err) + run_errs++; + else + run_successes++; } + + for (i = 0; i < test->runs; i++) { + if (unpriv && test->retvals[i].retval_unpriv) + expected_val = test->retvals[i].retval_unpriv; + else + expected_val = test->retvals[i].retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->retvals[i].data, + sizeof(test->retvals[i].data)); + if (err) { + printf("(run %d/%d) ", i + 1, test->runs); + run_errs++; + } else { + run_successes++; + } + } + } + + if (!run_errs) { + (*passes)++; + if (run_successes > 1) + printf("%d cases ", run_successes); + printf("OK"); + if (alignment_prevented_execution) + printf(" (NOTE: not executed due to unknown alignment)"); + printf("\n"); + } else { + printf("\n"); + goto fail_log; } - (*passes)++; - printf("OK%s\n", reject_from_alignment ? - " (NOTE: reject due to unknown alignment)" : ""); close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh new file mode 100755 index 000000000000..d72d8488a3b2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test operations that we expect to report extended ack. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + netdev_pre_up_test + vxlan_vlan_add_test + port_vlan_add_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +netdev_pre_up_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx2 up type vxlan id 2000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx2 master br2 + check_err $? + + ip link set dev $swp2 master br2 + check_err $? + + # Unsupported configuration: mlxsw demands that all offloaded VXLAN + # devices have the same TTL. + ip link set dev vx2 down + ip link set dev vx2 type vxlan ttl 200 + + ip link set dev vx2 up &>/dev/null + check_fail $? + + ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - NETDEV_PRE_UP" + + ip link del dev vx2 + ip link del dev br2 + + ip link del dev vx1 + ip link del dev br1 +} + +vxlan_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at VXLAN device" + + ip link del dev vx1 + ip link del dev br1 +} + +port_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan del dev $swp1 vid 1 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at port" + + ip link del dev vx1 + ip link del dev br1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..f02d83e94576 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh new file mode 100755 index 000000000000..94fdbf215c14 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -0,0 +1,565 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various interface configuration scenarios. Observe that configurations +# deemed valid by mlxsw succeed, invalid configurations fail and that no traces +# are produced. To prevent the test from passing in case traces are produced, +# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops' +# sysctls in its environment. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_set_addr_test + rif_inherit_bridge_addr_test + rif_non_inherit_bridge_addr_test + vlan_interface_deletion_test + bridge_deletion_test + bridge_vlan_flags_test + vlan_1_test + lag_bridge_upper_test + duplicate_vlans_test + vlan_rif_refcount_test + subport_rif_refcount_test + vlan_dev_deletion_test + lag_unlink_slaves_test + lag_dev_deletion_test + vlan_interface_uppers_test + devlink_reload_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +rif_set_addr_test() +{ + local swp1_mac=$(mac_get $swp1) + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # $swp1 and $swp2 likely got their IPv6 local addresses already, but + # here we need to test the transition to RIF. + ip addr flush dev $swp1 + ip addr flush dev $swp2 + sleep .1 + + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + ip link set dev $swp1 addr 00:11:22:33:44:55 + check_err $? + + # IP address enablement should be rejected if the MAC address prefix + # doesn't match other RIFs. + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_fail $? "IP address addition passed for a device with a wrong MAC" + ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for IP address addition" + + ip link set dev $swp2 addr 00:11:22:33:44:66 + check_err $? + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_err $? + + # Change of MAC address of a RIF should be forbidden if the new MAC + # doesn't share the prefix with other MAC addresses. + ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null + check_fail $? "change of MAC address passed for a wrong MAC" + ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for MAC address change" + + log_test "RIF - bad MAC change" + + ip addr del dev $swp2 192.0.2.2/28 + ip addr del dev $swp1 192.0.2.1/28 + + ip link set dev $swp2 addr $swp2_mac + ip link set dev $swp1 addr $swp1_mac +} + +rif_inherit_bridge_addr_test() +{ + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. That prompts bridge address change, which + # should be vetoed, thus preventing the attachment. + ip link set dev d master br1 &>/dev/null + check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" + ip link set dev d master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge attach rejection" + + ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null + check_fail $? "Changing swp2's MAC address permitted" + ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge port MAC address change rejection" + + log_test "RIF - attach port with bad MAC to bridge" + + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +rif_non_inherit_bridge_addr_test() +{ + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev br1 addr $swp2_mac + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. Since the bridge address was set, it should + # work. + ip link set dev d master br1 &>/dev/null + check_err $? "Could not attach a device with low MAC to a bridge with RIF" + + # Port MAC address change should be allowed for a bridge with set MAC. + ip link set dev $swp2 addr 00:11:22:33:44:55 + check_err $? "Changing swp2's MAC address not permitted" + + log_test "RIF - attach port with bad MAC to bridge with set MAC" + + ip link set dev $swp2 addr $swp2_mac + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +vlan_interface_deletion_test() +{ + # Test that when a VLAN interface is deleted, its associated router + # interface (RIF) is correctly deleted and not leaked. See commit + # c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is + # removed") for more details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link del dev br0.10 + + # If we leaked the previous RIF, then this should produce a trace + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:1::1/64 dev br0.20 + ip link del dev br0.20 + + log_test "vlan interface deletion" + + ip link del dev br0 +} + +bridge_deletion_test() +{ + # Test that when a bridge with VLAN interfaces is deleted, we correctly + # delete the associated RIFs. See commit 602b74eda813 ("mlxsw: + # spectrum_switchdev: Do not leak RIFs when removing bridge") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + ip -6 address add 2001:db8::1/64 dev br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:2::1/64 dev br0.20 + + ip link del dev br0 + + # If we leaked previous RIFs, then this should produce a trace + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "bridge deletion" +} + +bridge_vlan_flags_test() +{ + # Test that when bridge VLAN flags are toggled, we do not take + # unnecessary references on related structs. See commit 9e25826ffc94 + # ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 10 dev $swp1 untagged + bridge vlan add vid 10 dev $swp1 pvid + bridge vlan add vid 10 dev $swp1 + ip link del dev br0 + + # If we did not handle references correctly, then this should produce a + # trace + devlink dev reload "$DEVLINK_DEV" + + # Allow netdevices to be re-created following the reload + sleep 20 + + log_test "bridge vlan flags" +} + +vlan_1_test() +{ + # Test that VLAN 1 can be configured over mlxsw ports. In the past it + # was used internally for untagged traffic. See commit 47bf9df2e820 + # ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more + # details + RET=0 + + ip link add link $swp1 name $swp1.1 type vlan id 1 + check_err $? "did not manage to create vlan 1 when should" + + log_test "vlan 1" + + ip link del dev $swp1.1 +} + +lag_bridge_upper_test() +{ + # Test that ports cannot be enslaved to LAG devices that have uppers + # and that failure is handled gracefully. See commit b3529af6bb0d + # ("spectrum: Reference count VLAN entries") for more details + RET=0 + + ip link add name bond1 type bond mode 802.3ad + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 &> /dev/null + check_fail $? "managed to enslave port to lag when should not" + + # This might generate a trace, if we did not handle the failure + # correctly + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "lag with bridge upper" + + ip link del dev br0 + ip link del dev bond1 +} + +duplicate_vlans_test() +{ + # Test that on a given port a VLAN is only used once. Either as VLAN + # in a VLAN-aware bridge or as a VLAN device + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null + check_fail $? "managed to create vlan device when should not" + + bridge vlan del vid 10 dev $swp1 + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_err $? "did not manage to create vlan device when should" + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? "managed to add bridge vlan when should not" + + log_test "duplicate vlans" + + ip link del dev $swp1.10 + ip link del dev br0 +} + +vlan_rif_refcount_test() +{ + # Test that RIFs representing VLAN interfaces are not affected from + # ports member in the VLAN. We use the offload indication on routes + # configured on the RIF to understand if it was created / destroyed + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link set dev $swp1 up + ip link set dev br0 up + + ip link add link br0 name br0.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was not created before adding port to vlan" + + bridge vlan add vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after adding port to vlan" + + bridge vlan del vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after removing port from vlan" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_fail $? "vlan rif was not destroyed after unlinking port from bridge" + + log_test "vlan rif refcount" + + ip link del dev br0.10 + ip link set dev $swp1 down + ip link del dev br0 +} + +subport_rif_refcount_test() +{ + # Test that RIFs representing upper devices of physical ports are + # reference counted correctly and destroyed when should. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip link add link bond1 name bond1.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev bond1 + ip -6 address add 2001:db8:2::1/64 dev bond1.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif was not created on lag device" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif was not created on vlan device" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif of lag device was destroyed when should not" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif of vlan device was destroyed when should not" + + ip link set dev $swp2 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_fail $? "subport rif of lag device was not destroyed when should" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_fail $? "subport rif of vlan device was not destroyed when should" + + log_test "subport rif refcount" + + ip link del dev bond1.10 + ip link del dev bond1 +} + +vlan_dev_deletion_test() +{ + # Test that VLAN devices are correctly deleted / unlinked when enslaved + # to bridge + RET=0 + + ip link add name br10 type bridge + ip link add name br20 type bridge + ip link add name br30 type bridge + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip link add link $swp1 name $swp1.30 type vlan id 30 + ip link set dev $swp1.10 master br10 + ip link set dev $swp1.20 master br20 + ip link set dev $swp1.30 master br30 + + # If we did not handle the situation correctly, then these operations + # might produce a trace + ip link set dev $swp1.30 nomaster + ip link del dev $swp1.20 + # Deletion via ioctl uses different code paths from netlink + vconfig rem $swp1.10 &> /dev/null + + log_test "vlan device deletion" + + ip link del dev $swp1.30 + ip link del dev br30 + ip link del dev br20 + ip link del dev br10 +} + +lag_create() +{ + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link add link bond1 name bond1.10 type vlan id 10 + ip link add link bond1 name bond1.20 type vlan id 20 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link add name br10 type bridge + ip link set dev bond1.10 master br10 + + ip link add name br20 type bridge + ip link set dev bond1.20 master br20 +} + +lag_unlink_slaves_test() +{ + # Test that ports are correctly unlinked from their LAG master, when + # the LAG and its VLAN uppers are enslaved to bridges + RET=0 + + lag_create + + ip link set dev $swp1 nomaster + check_err $? "lag slave $swp1 was not unlinked from master" + ip link set dev $swp2 nomaster + check_err $? "lag slave $swp2 was not unlinked from master" + + # Try to configure corresponding VLANs as router interfaces + ip -6 address add 2001:db8:1::1/64 dev $swp1 + check_err $? "failed to configure ip address on $swp1" + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip -6 address add 2001:db8:10::1/64 dev $swp1.10 + check_err $? "failed to configure ip address on $swp1.10" + + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip -6 address add 2001:db8:20::1/64 dev $swp1.20 + check_err $? "failed to configure ip address on $swp1.20" + + log_test "lag slaves unlinking" + + ip link del dev $swp1.20 + ip link del dev $swp1.10 + ip address flush dev $swp1 + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 + ip link del dev bond1 +} + +lag_dev_deletion_test() +{ + # Test that LAG device is correctly deleted, when the LAG and its VLAN + # uppers are enslaved to bridges + RET=0 + + lag_create + + ip link del dev bond1 + + log_test "lag device deletion" + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 +} + +vlan_interface_uppers_test() +{ + # Test that uppers of a VLAN interface are correctly sanitized + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip link add link br0.10 name macvlan0 \ + type macvlan mode private &> /dev/null + check_fail $? "managed to create a macvlan when should not" + + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link add link br0.10 name macvlan0 type macvlan mode private + check_err $? "did not manage to create a macvlan when should" + + ip link del dev macvlan0 + + ip link add name vrf-test type vrf table 10 + ip link set dev br0.10 master vrf-test + check_err $? "did not manage to enslave vlan interface to vrf" + ip link del dev vrf-test + + ip link add name br-test type bridge + ip link set dev br0.10 master br-test &> /dev/null + check_fail $? "managed to enslave vlan interface to bridge when should not" + ip link del dev br-test + + log_test "vlan interface uppers" + + ip link del dev br0 +} + +devlink_reload_test() +{ + # Test that after executing all the above configuration tests, a + # devlink reload can be performed without errors + RET=0 + + devlink dev reload "$DEVLINK_DEV" + check_err $? "devlink reload failed" + + log_test "devlink reload - last test" + + sleep 20 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 3b75180f455d..b41d6256b2d0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -142,7 +143,7 @@ two_masks_test() tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ $tcflags dst_ip 192.0.2.2 action drop tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.0.0/16 action drop + $tcflags dst_ip 192.0.0.0/8 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -235,7 +236,7 @@ ctcam_two_atcam_masks_test() $tcflags dst_ip 192.0.2.2 action drop # Filter goes into A-TCAM tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.2.0/24 action drop + $tcflags dst_ip 192.0.0.0/16 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -324,6 +325,258 @@ ctcam_edge_cases_test() ctcam_no_atcam_masks_test } +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + perf_output=`perf script -F trace:event,trace` + hits=`echo $perf_output | grep "$tracepoint:" | wc -l` + if [[ "$count" -ne "$hits" ]]; then + return 1 + fi + return 0 +} + +delta_simple_test() +{ + # The first filter will create eRP, the second filter will fit into + # the first eRP with delta. Remove the first rule then and check that + # the eRP stays (referenced by the second filter). + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 1 + check_err $? "eRP was not created" + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 0 + check_err $? "eRP was incorrectly created" + tp_check_hits "objagg:objagg_obj_parent_assign" 1 + check_err $? "delta was not created" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 1 handle 101 flower" + tp_check_hits "objagg:objagg_obj_root_destroy" 0 + check_err $? "eRP was incorrectly destroyed" + tp_check_hits "objagg:objagg_obj_parent_unassign" 0 + check_err $? "delta was incorrectly destroyed" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after the first was removed" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 2 handle 102 flower" + tp_check_hits "objagg:objagg_obj_parent_unassign" 1 + check_err $? "delta was not destroyed" + tp_check_hits "objagg:objagg_obj_root_destroy" 1 + check_err $? "eRP was not destroyed" + + log_test "delta simple test ($tcflags)" +} + +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to excercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh new file mode 100755 index 000000000000..dcf9f4e913e0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -0,0 +1,1103 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various aspects of VxLAN offloading which are specific to mlxsw, such +# as sanitization of invalid configurations and offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="sanitization_test offload_indication_test \ + sanitization_vlan_aware_test offload_indication_vlan_aware_test" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +sanitization_single_dev_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? +} + +sanitization_single_dev_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 &> /dev/null + check_fail $? + + ip link set dev $swp1 nomaster + + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? +} + +sanitization_single_dev_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device - valid configuration" +} + +sanitization_single_dev_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a vlan-aware bridge" +} + +sanitization_single_dev_mcast_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast enabled bridge" +} + +sanitization_single_dev_mcast_group_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + dev $swp2 group 239.0.0.1 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast group" +} + +sanitization_single_dev_no_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with no local ip" +} + +sanitization_single_dev_local_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 2001:db8::1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local ipv6 address" +} + +sanitization_single_dev_learning_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_local_interface_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local interface" +} + +sanitization_single_dev_port_range_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + srcport 4000 5000 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp source port range" +} + +sanitization_single_dev_tos_static_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos 20 local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with static tos" +} + +sanitization_single_dev_ttl_inherit_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl inherit tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with inherit ttl" +} + +sanitization_single_dev_udp_checksum_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp checksum" +} + +sanitization_single_dev_test() +{ + # These tests make sure that we correctly sanitize VxLAN device + # configurations we do not support + sanitization_single_dev_valid_test + sanitization_single_dev_vlan_aware_test + sanitization_single_dev_mcast_enabled_test + sanitization_single_dev_mcast_group_test + sanitization_single_dev_no_local_ip_test + sanitization_single_dev_local_ipv6_test + sanitization_single_dev_learning_enabled_test + sanitization_single_dev_local_interface_test + sanitization_single_dev_port_range_test + sanitization_single_dev_tos_static_test + sanitization_single_dev_ttl_inherit_test + sanitization_single_dev_udp_checksum_test +} + +sanitization_multi_devs_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 + check_err $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? +} + +sanitization_multi_devs_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 &> /dev/null + check_fail $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev vxlan1 master br1 + check_err $? + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 &> /dev/null + check_fail $? +} + +sanitization_multi_devs_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_pass + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices - valid configuration" +} + +sanitization_multi_devs_ttl_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 40 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different ttl" +} + +sanitization_multi_devs_udp_dstport_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 5789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different udp destination port" +} + +sanitization_multi_devs_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.2 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different local ip" +} + +sanitization_multi_devs_test() +{ + # The device has a single VTEP, which means all the VxLAN devices + # we offload must share certain properties such as source IP and + # UDP destination port. These tests make sure that we forbid + # configurations that violate this limitation + sanitization_multi_devs_valid_test + sanitization_multi_devs_ttl_test + sanitization_multi_devs_udp_dstport_test + sanitization_multi_devs_local_ip_test +} + +sanitization_test() +{ + sanitization_single_dev_test + sanitization_multi_devs_test +} + +offload_indication_setup_create() +{ + # Create a simple setup with two bridges, each with a VxLAN device + # and one local port + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +offload_indication_setup_destroy() +{ + ip link del dev vxlan1 + ip link del dev vxlan0 + + ip address del 198.51.100.1/32 dev lo + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link del dev br1 + ip link del dev br0 +} + +offload_indication_fdb_flood_test() +{ + RET=0 + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ + | grep -q offload + check_err $? + + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self + + log_test "vxlan flood entry offload indication" +} + +offload_indication_fdb_bridge_test() +{ + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ + dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master +} + +offload_indication_fdb_test() +{ + offload_indication_fdb_flood_test + offload_indication_fdb_bridge_test +} + +offload_indication_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan1 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device down" + + RET=0 + + ip link set dev vxlan1 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vxlan device up" + + RET=0 + + ip address delete 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip address add 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - add local route" + + RET=0 + + ip link set dev $swp1 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev $swp2 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - local ports enslavement" + + RET=0 + + ip link del dev br0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - bridge device deletion" + + RET=0 + + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip link set dev vxlan0 master br0 + ip link set dev vxlan1 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device deletion" + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +check_fdb_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_err $? + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? +} + +check_vxlan_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_fail $? + + bridge fdb show dev vxlan0 | grep $zmac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? +} + +check_bridge_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q master + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_fail $? +} + +__offload_indication_join_vxlan_first() +{ + local vid=$1; shift + + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev vxlan0 master br0 + bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + + RET=0 + check_vxlan_fdb_not_offloaded + ip link set dev $swp1 master br0 + sleep .1 + check_fdb_offloaded + log_test "offload indication - attach vxlan first" + + RET=0 + ip link set dev vxlan0 down + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - set vxlan down" + + RET=0 + ip link set dev vxlan0 up + sleep .1 + check_fdb_offloaded + log_test "offload indication - set vxlan up" + + if [[ ! -z $vid ]]; then + RET=0 + bridge vlan del dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - delete VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - add tagged VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid pvid untagged + sleep .1 + check_fdb_offloaded + log_test "offload indication - add pvid/untagged VLAN" + fi + + RET=0 + ip link set dev $swp1 nomaster + check_vxlan_fdb_not_offloaded + log_test "offload indication - detach port" +} + +offload_indication_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first + + ip link del dev vxlan0 + ip link del dev br0 +} + +__offload_indication_join_vxlan_last() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev $swp1 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? + + ip link set dev vxlan0 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? + + log_test "offload indication - attach vxlan last" +} + +offload_indication_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_test() +{ + offload_indication_setup_create + offload_indication_fdb_test + offload_indication_decap_route_test + offload_indication_setup_destroy + + log_info "offload indication - replay & cleanup" + offload_indication_join_vxlan_first + offload_indication_join_vxlan_last +} + +sanitization_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + # Test that when each VNI is mapped to a different VLAN we can enslave + # a port to the bridge + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 + check_err $? + + log_test "vlan-aware - enslavement to vlan-aware bridge" + + # Try to map both VNIs to the same VLAN and make sure configuration + # fails + RET=0 + + bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null + check_fail $? + + log_test "vlan-aware - two vnis mapped to the same vlan" + + # Test that enslavement of a port to a bridge fails when two VNIs + # are mapped to the same VLAN + RET=0 + + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vxlan20 pvid untagged + bridge vlan add vid 10 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed enslavement to vlan-aware bridge" + + ip link del dev vxlan20 + ip link del dev vxlan10 + ip link del dev br0 +} + +offload_indication_vlan_aware_setup_create() +{ + # Create a simple setup with two VxLAN devices and a single VLAN-aware + # bridge + ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \ + vlan_default_pvid 0 + + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged +} + +offload_indication_vlan_aware_setup_destroy() +{ + bridge vlan del vid 20 dev vxlan20 + bridge vlan del vid 10 dev vxlan10 + + ip link del dev vxlan20 + ip link del dev vxlan10 + + ip address del 198.51.100.1/32 dev lo + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ip link set dev $swp1 nomaster + + ip link del dev br0 +} + +offload_indication_vlan_aware_fdb_test() +{ + RET=0 + + log_info "vxlan entry offload indication - vlan-aware" + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ + dst 198.51.100.2 vlan 10 + + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10 +} + +offload_indication_vlan_aware_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on one VxLAN device and make sure route is still + # marked as offloaded + bridge vlan add vid 10 dev vxlan10 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on second VxLAN device and make sure route is no + # longer marked as offloaded + bridge vlan add vid 20 dev vxlan20 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + # Toggle PVID flag back and make sure route is marked as offloaded + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vni map/unmap" +} + +offload_indication_vlan_aware_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first 1 + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_l3vni_test() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + sysctl_set net.ipv6.conf.default.disable_ipv6 1 + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link set dev $swp1 master br0 + + # The test will use the offload indication on the FDB entry to + # understand if the tunnel is offloaded or not + bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 + + ip link set dev vxlan0 master br0 + bridge vlan add dev vxlan0 vid 10 pvid untagged + + # No local port or router port is member in the VLAN, so tunnel should + # not be offloaded + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded when should not" + + # Configure a VLAN interface and make sure tunnel is offloaded + ip link add link br0 name br10 up type vlan id 10 + sysctl_set net.ipv6.conf.br10.disable_ipv6 0 + ip -6 address add 2001:db8:1::1/64 dev br10 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded when should" + + # Unlink the VXLAN device, make sure tunnel is no longer offloaded, + # then add it back to the bridge and make sure it is offloaded + ip link set dev vxlan0 nomaster + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded after unlinked from bridge" + + ip link set dev vxlan0 master br0 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded despite no matching vid" + + bridge vlan add dev vxlan0 vid 10 pvid untagged + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded after adding vid" + + log_test "vxlan - l3 vni" + + ip link del dev vxlan0 + ip link del dev br0 + sysctl_restore net.ipv6.conf.default.disable_ipv6 +} + +offload_indication_vlan_aware_test() +{ + offload_indication_vlan_aware_setup_create + offload_indication_vlan_aware_fdb_test + offload_indication_vlan_aware_decap_route_test + offload_indication_vlan_aware_setup_destroy + + log_info "offload indication - replay & cleanup - vlan aware" + offload_indication_vlan_aware_join_vxlan_first + offload_indication_vlan_aware_join_vxlan_last + offload_indication_vlan_aware_l3vni_test +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh new file mode 100755 index 000000000000..fedcb7b35af9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to three IPv4 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 203.0.113.1/24| +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 198.51.100.1 | | +# | | remote 198.51.100.{2..13} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 198.51.100.0/24 via 192.0.2.2 | +# | | +# | + $rp1 | +# | | 192.0.2.1/24 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 203.0.113.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 203.0.113.1/24 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip address add 198.51.100.1/32 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 198.51.100.1/32 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 192.0.2.1/24 dev $rp1 + ip route add 198.51.100.0/24 via 192.0.2.2 +} + +router1_destroy() +{ + ip route del 198.51.100.0/24 via 192.0.2.2 + ip address del 192.0.2.1/24 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 192.0.2.2/24 +} + +router2_destroy() +{ + simple_if_fini $rp2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 198.51.100.$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ip pref $i handle $i \ + flower ip_proto udp dst_ip 198.51.100.$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ip pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 12 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=12 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 8..10 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 1 1 1 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 3 3 3 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 4 4 4 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 4 5 5 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 8cf22b3c2563..6f81130605d7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,6 +3,7 @@ socket psock_fanout psock_snd psock_tpacket +reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa @@ -14,4 +15,5 @@ udpgso_bench_rx udpgso_bench_tx tcp_inq tls +txring_overwrite ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index eec359895feb..f8f3e90700c0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,14 +4,16 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ + rtnetlink.sh xfrm_policy.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh +TEST_PROGS += test_vxlan_fdb_changelink.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cd3a2f1545b5..5821bdd98d20 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -14,3 +14,17 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_NAT_IPV6=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV4=m diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 85d253546684..3f248d1f5b91 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} +MCD=${MCD:=smcrouted} +MC_CLI=${MC_CLI:=smcroutectl} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -104,7 +106,7 @@ create_netif_veth() { local i - for i in $(eval echo {1..$NUM_NETIFS}); do + for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) ip link show dev ${NETIFS[p$i]} &> /dev/null @@ -135,7 +137,7 @@ if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi -for i in $(eval echo {1..$NUM_NETIFS}); do +for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" @@ -477,11 +479,24 @@ master_name_get() ip -j link show dev $if_name | jq -r '.[]["master"]' } +link_stats_get() +{ + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j -s link show dev $if_name \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + link_stats_tx_packets_get() { - local if_name=$1 + link_stats_get $1 tx packets +} - ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +link_stats_rx_errors_get() +{ + link_stats_get $1 rx errors } tc_rule_stats_get() @@ -783,6 +798,17 @@ multipath_eval() log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" } +in_ns() +{ + local name=$1; shift + + ip netns exec $name bash <<-EOF + NUM_NETIFS=0 + source lib.sh + $(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done) + EOF +} + ############################################################################## # Tests @@ -790,10 +816,11 @@ ping_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping_test() @@ -802,17 +829,18 @@ ping_test() ping_do $1 $2 check_err $? - log_test "ping" + log_test "ping$3" } ping6_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING6 $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping6_test() @@ -821,7 +849,7 @@ ping6_test() ping6_do $1 $2 check_err $? - log_test "ping6" + log_test "ping6$3" } learning_test() diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh new file mode 100755 index 000000000000..109e6d785169 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------+ +# | H1 (v$h1) | +# | 2001:db8:1::2/64 | +# | 198.51.100.2/28 | +# | $h1 + | +# +-------------|----+ +# | +# +-------------|-------------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/28 | +# | 2001:db8:1::1/64 | +# | | +# | 2001:db8:2::1/64 2001:db8:3::1/64 | +# | 198.51.100.17/28 198.51.100.33/28 | +# | $rp2 + $rp3 + | +# +--------------|--------------------------|---+ +# | | +# | | +# +--------------|---+ +--------------|---+ +# | H2 (v$h2) | | | H3 (v$h3) | | +# | $h2 + | | $h3 + | +# | 198.51.100.18/28 | | 198.51.100.34/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# +------------------+ +------------------+ +# + +ALL_TESTS="mcast_v4 mcast_v6" +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 + + ip route add 198.51.100.16/28 vrf v$h1 nexthop via 198.51.100.1 + ip route add 198.51.100.32/28 vrf v$h1 nexthop via 198.51.100.1 + + ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 + ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:3::/64 vrf v$h1 + ip route del 2001:db8:2::/64 vrf v$h1 + + ip route del 198.51.100.32/28 vrf v$h1 + ip route del 198.51.100.16/28 vrf v$h1 + + simple_if_fini $h1 198.51.100.2/28 2001:db8:1::2/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.18/28 2001:db8:2::2/64 + + ip route add 198.51.100.0/28 vrf v$h2 nexthop via 198.51.100.17 + ip route add 198.51.100.32/28 vrf v$h2 nexthop via 198.51.100.17 + + ip route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:2::1 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 2001:db8:3::/64 vrf v$h2 + ip route del 2001:db8:1::/64 vrf v$h2 + + ip route del 198.51.100.32/28 vrf v$h2 + ip route del 198.51.100.0/28 vrf v$h2 + + simple_if_fini $h2 198.51.100.18/28 2001:db8:2::2/64 +} + +h3_create() +{ + simple_if_init $h3 198.51.100.34/28 2001:db8:3::2/64 + + ip route add 198.51.100.0/28 vrf v$h3 nexthop via 198.51.100.33 + ip route add 198.51.100.16/28 vrf v$h3 nexthop via 198.51.100.33 + + ip route add 2001:db8:1::/64 vrf v$h3 nexthop via 2001:db8:3::1 + ip route add 2001:db8:2::/64 vrf v$h3 nexthop via 2001:db8:3::1 + + tc qdisc add dev $h3 ingress +} + +h3_destroy() +{ + tc qdisc del dev $h3 ingress + + ip route del 2001:db8:2::/64 vrf v$h3 + ip route del 2001:db8:1::/64 vrf v$h3 + + ip route del 198.51.100.16/28 vrf v$h3 + ip route del 198.51.100.0/28 vrf v$h3 + + simple_if_fini $h3 198.51.100.34/28 2001:db8:3::2/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + ip link set dev $rp3 up + + ip address add 198.51.100.1/28 dev $rp1 + ip address add 198.51.100.17/28 dev $rp2 + ip address add 198.51.100.33/28 dev $rp3 + + ip address add 2001:db8:1::1/64 dev $rp1 + ip address add 2001:db8:2::1/64 dev $rp2 + ip address add 2001:db8:3::1/64 dev $rp3 +} + +router_destroy() +{ + ip address del 2001:db8:3::1/64 dev $rp3 + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 2001:db8:1::1/64 dev $rp1 + + ip address del 198.51.100.33/28 dev $rp3 + ip address del 198.51.100.17/28 dev $rp2 + ip address del 198.51.100.1/28 dev $rp1 + + ip link set dev $rp3 down + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + start_mcd + + vrf_prepare + + h1_create + h2_create + h3_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + kill_mcd +} + +create_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs +} + +delete_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs +} + +mcast_v4() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 122 flower \ + dst_ip 225.1.2.3 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 133 flower \ + dst_ip 225.1.2.3 action drop + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ip pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 122 flower + + log_test "mcast IPv4" +} + +mcast_v6() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 122 flower \ + dst_ip ff0e::3 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 133 flower \ + dst_ip ff0e::3 action drop + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on first host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 122 flower + + log_test "mcast IPv6" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh new file mode 100755 index 000000000000..a7306c7ac06d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="ping_ipv4 ping_ipv6" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev vrf-h1 up + + ip link set dev $h1 up + vlan_create $h1 1 vrf-h1 192.0.2.2/24 2001:db8:1::2/64 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + vlan_destroy $h1 1 + ip link set dev $h1 down + + ip link set dev vrf-h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev vrf-h2 up + + ip link set dev $h2 up + vlan_create $h2 1 vrf-h2 198.51.100.2/24 2001:db8:2::2/64 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + vlan_destroy $h2 1 + ip link set dev $h2 down + + ip link set dev vrf-h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link add link $rp1 name $rp1.1 up type vlan id 1 + + ip address add 192.0.2.1/24 dev $rp1.1 + ip address add 2001:db8:1::1/64 dev $rp1.1 + + ip link set dev $rp2 up + ip link add link $rp2 name $rp2.1 up type vlan id 1 + + ip address add 198.51.100.1/24 dev $rp2.1 + ip address add 2001:db8:2::1/64 dev $rp2.1 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2.1 + ip address del 198.51.100.1/24 dev $rp2.1 + + ip link del dev $rp2.1 + ip link set dev $rp2 down + + ip address del 2001:db8:1::1/64 dev $rp1.1 + ip address del 192.0.2.1/24 dev $rp1.1 + + ip link del dev $rp1.1 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1.1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh new file mode 100755 index 000000000000..56cef3b1c194 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx1 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + rp1_unset_addr + ip link set dev $rp1 down + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr dstport "$VXPORT" + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get attached to the +# bridge, and that at the point that the local IP is already configured. Try the +# other scenario of attaching the device to an already-offloaded bridge, and +# only then attach the local IP. +reapply_config() +{ + echo "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + rp1_unset_addr + ip link set dev vx1 nomaster + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + sleep 1 + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 192.0.2.34" + "$r2_mac vx1 192.0.2.50") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 192.0.2.2 0 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 192.0.2.3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos 0x40 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"$inner_tos:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request sequence number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 prot ip \ + flower ip_tos $decapped_tos action pass + sleep 1 + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos=00 + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap 00 00 0x00 + __test_ecn_decap 01 01 0x01 + __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 03 0x03 + test_ecn_decap_error +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + + # Enable learning on the VxLAN device and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx1 type vxlan ageing 10 + ip link set dev vx1 type vxlan learning + reapply_config + + # Check that flooding works + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in VxLAN device vx1 + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + vxlan_flood_test $mac $dst 0 10 0 + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev vx1 $mac master self + sleep 1 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + vxlan_flood_test $mac $dst 0 10 0 + + sleep 20 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_fail $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev vx1 type bridge_slave learning off + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + ip link set dev vx1 type bridge_slave learning on + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" + + # Restore previous settings + ip link set dev vx1 type vxlan nolearning + ip link set dev vx1 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh new file mode 100755 index 000000000000..3bf3da69195f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1d.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh new file mode 100755 index 000000000000..a5789721ba92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -0,0 +1,860 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local 192.0.2.17 local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + test_pvid + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx10 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 \ + 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 \ + 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and that at the point that the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + ping_test $h1.10 192.0.2.3 ": local->remote 1 vid 10" + ping_test $h1.10 192.0.2.4 ": local->remote 2 vid 10" + ping_test $h1.20 198.51.100.3 ": local->remote 1 vid 20" + ping_test $h1.20 198.51.100.4 ": local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 192.0.2.34" + "$r2_mac vx10 192.0.2.50") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 192.0.2.2 0 10 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 192.0.2.34" \ + "$r2_mac vx20 192.0.2.50") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 198.51.100.2 0 20 "local MAC unicast" + __test_unicast $r1_mac 198.51.100.3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 198.51.100.4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +__test_learning() +{ + local -a expects=(0 0 0 0 0) + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local idx1=$1; shift + local idx2=$1; shift + local vx=vx$vid + + # Check that flooding works + RET=0 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in the VxLAN device + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev $vx $mac master self vlan $vid + sleep 1 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + sleep 20 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_fail $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev $vx type bridge_slave learning off + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + ip link set dev $vx type bridge_slave learning on + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Enable learning on the VxLAN devices and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx10 type vxlan ageing 10 + ip link set dev vx10 type vxlan learning + ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan learning + reapply_config + + log_info "learning vlan 10" + + __test_learning $mac $dst $vid 1 3 + + log_info "learning vlan 20" + + mac=ca:fe:be:ef:13:37 + dst=198.51.100.100 + vid=20 + + __test_learning $mac $dst $vid 2 4 + + # Restore previous settings + ip link set dev vx20 type vxlan nolearning + ip link set dev vx20 type vxlan ageing 300 + ip link set dev vx10 type vxlan nolearning + ip link set dev vx10 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh new file mode 100755 index 000000000000..b1b2d1a3164f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1q.sh diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 406cc70c571d..4b02933cab8a 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type) static void do_rx(int domain, int type, int protocol) { + const int cfg_receiver_wait_ms = 400; uint64_t tstop; int fd; fd = do_setup_rx(domain, type, protocol); - tstop = gettimeofday_ms() + cfg_runtime_ms; + tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; do { if (type == SOCK_STREAM) do_flush_tcp(fd); diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index c43c6debda06..825ffec85cea 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max" if [[ "$#" -eq "0" ]]; then $0 4 tcp -t 1 $0 6 tcp -t 1 + $0 4 udp -t 1 + $0 6 udp -t 1 echo "OK. All tests passed" exit 0 fi diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c new file mode 100644 index 000000000000..6f54d425dba9 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test that sockets listening on a specific address are preferred + * over sockets listening on addr_any. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/dccp.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, + const char *addr_str) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt, i, sz; + + memset(&addr, 0, sizeof(addr)); + + switch (family) { + case AF_INET: + addr4.sin_family = family; + if (!addr_str) + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + if (!addr_str) + addr6.sin6_addr = in6addr_any; + else if (!inet_pton(family, addr_str, &addr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "tcp: failed to listen on receive port"); + else if (proto == SOCK_DCCP) { + if (setsockopt(rcv_fds[i], SOL_DCCP, + DCCP_SOCKOPT_SERVICE, + &(int) {htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (listen(rcv_fds[i], 10)) + error(1, errno, "dccp: failed to listen on receive port"); + } + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (proto == SOCK_DCCP && + setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, + &(int){htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, daddr, sz)) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + return fd; +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, 3); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int family, int proto, int fd) +{ + struct epoll_event ev; + int epfd, i, send_fd, recv_fd; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_fd = connect_and_send(family, proto); + + recv_fd = receive_once(epfd, proto); + if (recv_fd != fd) + error(1, 0, "received on an unexpected socket"); + + close(send_fd); + close(epfd); +} + + +static void run_one_test(int fam_send, int fam_rcv, int proto, + const char *addr_str) +{ + /* Below we test that a socket listening on a specific address + * is always selected in preference over a socket listening + * on addr_any. Bugs where this is not the case often result + * in sockets created first or last to get picked. So below + * we make sure that there are always addr_any sockets created + * before and after a specific socket is created. + */ + int rcv_fds[10], i; + + build_rcv_fd(AF_INET, proto, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 2, 2, NULL); + build_rcv_fd(fam_rcv, proto, rcv_fds + 4, 1, addr_str); + build_rcv_fd(AF_INET, proto, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, fam_send, proto, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + fprintf(stderr, "pass\n"); +} + +static void test_proto(int proto, const char *proto_str) +{ + if (proto == SOCK_DCCP) { + int test_fd; + + test_fd = socket(AF_INET, proto, 0); + if (test_fd < 0) { + if (errno == ESOCKTNOSUPPORT) { + fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); + return; + } else + error(1, errno, "failed to create a DCCP socket"); + } + close(test_fd); + } + + fprintf(stderr, "%s IPv4 ... ", proto_str); + run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); + + fprintf(stderr, "%s IPv6 ... ", proto_str); + run_one_test(AF_INET6, AF_INET6, proto, IP6_ADDR); + + fprintf(stderr, "%s IPv4 mapped to IPv6 ... ", proto_str); + run_one_test(AF_INET, AF_INET6, proto, IP4_MAPPED6); +} + +int main(void) +{ + test_proto(SOCK_DGRAM, "UDP"); + test_proto(SOCK_STREAM, "TCP"); + test_proto(SOCK_DCCP, "DCCP"); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh new file mode 100755 index 000000000000..104592f62ad4 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./reuseport_addr_any diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e101af52d1d6..78fc593dfe40 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -205,6 +205,8 @@ kci_test_polrouting() kci_test_route_get() { + local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + ret=0 ip route get 127.0.0.1 > /dev/null @@ -223,6 +225,19 @@ kci_test_route_get() check_err $? ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null check_err $? + ip route add 10.23.8.0/24 \ + nexthop via 10.23.7.13 dev "$devdummy" \ + nexthop via 10.23.7.14 dev "$devdummy" + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=0 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=1 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy" + ip route del 10.23.8.0/24 + check_err $? ip addr del dev "$devdummy" 10.23.7.11/24 check_err $? @@ -955,6 +970,111 @@ kci_test_ip6erspan() ip netns del "$testns" } +kci_test_fdb_get() +{ + IP="ip -netns testns" + BRIDGE="bridge -netns testns" + brdev="test-br0" + vxlandev="vxlan10" + test_mac=de:ad:be:ef:13:37 + localip="10.0.2.2" + dstip="10.0.2.3" + ret=0 + + bridge fdb help 2>&1 |grep -q 'bridge fdb get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + ip netns add testns + if [ $? -ne 0 ]; then + echo "SKIP fdb get tests: cannot add net namespace $testns" + return $ksft_skip + fi + + $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 2>/dev/null + check_err $? + $IP link add name "$brdev" type bridge &>/dev/null + check_err $? + $IP link set dev "$vxlandev" master "$brdev" &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null + check_err $? + + $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" + check_err $? + + ip netns del testns &>/dev/null + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge fdb get" + return 1 + fi + + echo "PASS: bridge fdb get" +} + +kci_test_neigh_get() +{ + dstmac=de:ad:be:ef:13:37 + dstip=10.0.2.4 + dstip6=dead::2 + ret=0 + + ip neigh help 2>&1 |grep -q 'ip neigh get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + # ipv4 + ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv4 proxy + ip neigh add proxy $dstip dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip" + check_err $? + ip neigh del proxy $dstip dev "$devdummy" > /dev/null + check_err $? + + # ipv6 + ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv6 proxy + ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6" + check_err $? + ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: neigh get" + return 1 + fi + + echo "PASS: neigh get" +} + kci_test_rtnl() { kci_add_dummy @@ -979,6 +1099,8 @@ kci_test_rtnl() kci_test_macsec kci_test_ipsec kci_test_ipsec_offload + kci_test_fdb_get + kci_test_neigh_get kci_del_dummy } diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index bea079edc278..2dc95fda7ef7 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -25,3 +25,13 @@ if [ $? -ne 0 ]; then else echo "[PASS]" fi + +echo "--------------------" +echo "running txring_overwrite test" +echo "--------------------" +./in_netns.sh ./txring_overwrite +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh new file mode 100755 index 000000000000..2d442cdab11e --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Check FDB default-remote handling across "ip link set". + +check_remotes() +{ + local what=$1; shift + local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) + + echo -ne "expected two remotes after $what\t" + if [[ $N != 2 ]]; then + echo "[FAIL]" + EXIT_STATUS=1 + else + echo "[ OK ]" + fi +} + +ip link add name vx up type vxlan id 2000 dstport 4789 +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent +check_remotes "fdb append" + +ip link set dev vx type vxlan remote 192.0.2.30 +check_remotes "link set" + +ip link del dev vx +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh new file mode 100755 index 000000000000..09f9ed92cbe4 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking VXLAN underlay in a non-default VRF. +# +# It simulates two hypervisors running a VM each using four network namespaces: +# two for the HVs, two for the VMs. +# A small VXLAN tunnel is made between the two hypervisors to have the two vms +# in the same virtual L2: +# +# +-------------------+ +-------------------+ +# | | | | +# | vm-1 netns | | vm-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth-hv | | | | veth-hv | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +------------------------------------+ +# | . | | . | +# | +----------+ | | +----------+ | +# | | veth-tap | | | | veth-tap | | +# | +----+-----+ | | +----+-----+ | +# | | | | | | +# | +--+--+ +--------------+ | | +--------------+ +--+--+ | +# | | br0 | | vrf-underlay | | | | vrf-underlay | | br0 | | +# | +--+--+ +-------+------+ | | +------+-------+ +--+--+ | +# | | | | | | | | +# | +---+----+ +-------+-------+ | | +-------+-------+ +---+----+ | +# | | vxlan0 |....| veth0 |.|...|.| veth0 |....| vxlan0 | | +# | +--------+ | 172.16.0.1/24 | | | | 172.16.0.2/24 | +--------+ | +# | +---------------+ | | +---------------+ | +# | | | | +# | hv-1 netns | | hv-2 netns | +# | | | | +# +-----------------------------------+ +------------------------------------+ +# +# This tests both the connectivity between vm-1 and vm-2, and that the underlay +# can be moved in and out of the vrf by unsetting and setting veth0's master. + +set -e + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del veth-tap 2>/dev/null || true + + for ns in hv-1 hv-2 vm-1 vm-2; do + ip netns del $ns || true + done +} + +# Clean start +cleanup &> /dev/null + +[[ $1 == "clean" ]] && exit 0 + +trap cleanup EXIT + +# Setup "Hypervisors" simulated with netns +ip link add veth-hv-1 type veth peer name veth-hv-2 +setup-hv-networking() { + hv=$1 + + ip netns add hv-$hv + ip link set veth-hv-$hv netns hv-$hv + ip -netns hv-$hv link set veth-hv-$hv name veth0 + + ip -netns hv-$hv link add vrf-underlay type vrf table 1 + ip -netns hv-$hv link set vrf-underlay up + ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 + ip -netns hv-$hv link set veth0 up + + ip -netns hv-$hv link add br0 type bridge + ip -netns hv-$hv link set br0 up + + ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 + ip -netns hv-$hv link set vxlan0 master br0 + ip -netns hv-$hv link set vxlan0 up +} +setup-hv-networking 1 +setup-hv-networking 2 + +# Check connectivity between HVs by pinging hv-2 from hv-1 +echo -n "Checking HV connectivity " +ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Setups a "VM" simulated by a netns an a veth pair +setup-vm() { + id=$1 + + ip netns add vm-$id + ip link add veth-tap type veth peer name veth-hv + + ip link set veth-tap netns hv-$id + ip -netns hv-$id link set veth-tap master br0 + ip -netns hv-$id link set veth-tap up + + ip link set veth-hv netns vm-$id + ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv + ip -netns vm-$id link set veth-hv up +} +setup-vm 1 +setup-vm 2 + +# Setup VTEP routes to make ARP work +bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent + +echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Move the underlay to a non-default VRF +ip -netns hv-1 link set veth0 vrf vrf-underlay +ip -netns hv-1 link set veth0 down +ip -netns hv-1 link set veth0 up +ip -netns hv-2 link set veth0 vrf vrf-underlay +ip -netns hv-2 link set veth0 down +ip -netns hv-2 link set veth0 up + +echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c new file mode 100644 index 000000000000..fd8b1c663c39 --- /dev/null +++ b/tools/testing/selftests/net/txring_overwrite.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Verify that consecutive sends over packet tx_ring are mirrored + * with their original content intact. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +const int eth_off = TPACKET_HDRLEN - sizeof(struct sockaddr_ll); +const int cfg_frame_size = 1000; + +static void build_packet(void *buffer, size_t blen, char payload_char) +{ + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + size_t off = 0; + + memset(buffer, 0, blen); + + eth = buffer; + eth->h_proto = htons(ETH_P_IP); + + off += sizeof(*eth); + iph = buffer + off; + iph->ttl = 8; + iph->ihl = 5; + iph->version = 4; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK + 1); + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(blen - off); + iph->check = 0; + + off += sizeof(*iph); + udph = buffer + off; + udph->dest = htons(8000); + udph->source = htons(8001); + udph->len = htons(blen - off); + udph->check = 0; + + off += sizeof(*udph); + memset(buffer + off, payload_char, blen - off); +} + +static int setup_rx(void) +{ + int fdr; + + fdr = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + if (fdr == -1) + error(1, errno, "socket r"); + + return fdr; +} + +static int setup_tx(char **ring) +{ + struct sockaddr_ll laddr = {}; + struct tpacket_req req = {}; + int fdt; + + fdt = socket(PF_PACKET, SOCK_RAW, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(0); + laddr.sll_ifindex = if_nametoindex("lo"); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fdt, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind fdt"); + + req.tp_block_size = getpagesize(); + req.tp_block_nr = 1; + req.tp_frame_size = getpagesize(); + req.tp_frame_nr = 1; + + if (setsockopt(fdt, SOL_PACKET, PACKET_TX_RING, + (void *)&req, sizeof(req))) + error(1, errno, "setsockopt ring"); + + *ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0); + if (!*ring) + error(1, errno, "mmap"); + + return fdt; +} + +static void send_pkt(int fdt, void *slot, char payload_char) +{ + struct tpacket_hdr *header = slot; + int ret; + + while (header->tp_status != TP_STATUS_AVAILABLE) + usleep(1000); + + build_packet(slot + eth_off, cfg_frame_size, payload_char); + + header->tp_len = cfg_frame_size; + header->tp_status = TP_STATUS_SEND_REQUEST; + + ret = sendto(fdt, NULL, 0, 0, NULL, 0); + if (ret == -1) + error(1, errno, "kick tx"); +} + +static int read_verify_pkt(int fdr, char payload_char) +{ + char buf[100]; + int ret; + + ret = read(fdr, buf, sizeof(buf)); + if (ret != sizeof(buf)) + error(1, errno, "read"); + + if (buf[60] != payload_char) { + printf("wrong pattern: 0x%x != 0x%x\n", buf[60], payload_char); + return 1; + } + + printf("read: %c (0x%x)\n", buf[60], buf[60]); + return 0; +} + +int main(int argc, char **argv) +{ + const char payload_patterns[] = "ab"; + char *ring; + int fdr, fdt, ret = 0; + + fdr = setup_rx(); + fdt = setup_tx(&ring); + + send_pkt(fdt, ring, payload_patterns[0]); + send_pkt(fdt, ring, payload_patterns[1]); + + ret |= read_verify_pkt(fdr, payload_patterns[0]); + ret |= read_verify_pkt(fdr, payload_patterns[1]); + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); + + return ret; +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index e94ef8067173..aeac53a99aeb 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -91,6 +91,28 @@ run_one_nat() { wait $(jobs -p) } +run_one_2sock() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} -p 12345 + sleep 0.1 + # first UDP GSO socket should be closed at this point + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + run_nat_test() { local -r args=$@ @@ -98,6 +120,13 @@ run_nat_test() { ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 } +run_2sock_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_2sock $2 rx -G -r $3 +} + run_all() { local -r core_args="-l 4" local -r ipv4_args="${core_args} -4 -D 192.168.1.1" @@ -120,6 +149,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" echo "ipv6" run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" @@ -130,6 +160,7 @@ run_all() { run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" } if [ ! -f ../bpf/xdp_dummy.o ]; then @@ -145,4 +176,7 @@ elif [[ $1 == "__subprocess" ]]; then elif [[ $1 == "__subprocess_nat" ]]; then shift run_one_nat $@ +elif [[ $1 == "__subprocess_2sock" ]]; then + shift + run_one_2sock $@ fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 0f0628613f81..5670a9ffd8eb 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -35,6 +35,9 @@ run_udp() { echo "udp gso" run_in_netns ${args} -S 0 + + echo "udp gso zerocopy" + run_in_netns ${args} -S 0 -z } run_tcp() { diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh new file mode 100755 index 000000000000..8db35b99457c --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -0,0 +1,302 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check xfrm policy resolution. Topology: +# +# 1.2 1.1 3.1 3.10 2.1 2.2 +# eth1 eth1 veth0 veth0 eth1 eth1 +# ns1 ---- ns3 ----- ns4 ---- ns2 +# +# ns3 and ns4 are connected via ipsec tunnel. +# pings from ns1 to ns2 (and vice versa) are supposed to work like this: +# ns1: ping 10.0.2.2: passes via ipsec tunnel. +# ns2: ping 10.0.1.2: passes via ipsec tunnel. + +# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy) +# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy) +# +# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) +# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +policy_checks_ok=1 + +KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd +KEY_AES=0x0123456789abcdef0123456789012345 +SPI1=0x1 +SPI2=0x2 + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet + ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow +} + +do_esp_policy_get_check() { + local ns=$1 + local lnet=$2 + local rnet=$3 + + ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out" + ret=1 + fi + + ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd" + ret=1 + fi +} + +do_exception() { + local ns=$1 + local me=$2 + local remote=$3 + local encryptip=$4 + local plain=$5 + + # network $plain passes without tunnel + ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow + + # direct policy for $encryptip, use tunnel, higher prio takes precedence + ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow +} + +# policies that are not supposed to match any packets generated in this test. +do_dummies4() { + local ns=$1 + + for i in $(seq 10 16);do + # dummy policy with wildcard src/dst. + echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block + # silly, as it encompasses the one above too, but its allowed: + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block + # and yet again, even more broad one. + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +do_dummies6() { + local ns=$1 + + for i in $(seq 10 16);do + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block + echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +check_ipt_policy_count() +{ + ns=$1 + + ip netns exec $ns iptables-save -c |grep policy | ( read c rest + ip netns exec $ns iptables -Z + if [ x"$c" = x'[0:0]' ]; then + exit 0 + elif [ x"$c" = x ]; then + echo "ERROR: No counters" + ret=1 + exit 111 + else + exit 1 + fi + ) +} + +check_xfrm() { + # 0: iptables -m policy rule count == 0 + # 1: iptables -m policy rule count != 0 + rval=$1 + ip=$2 + lret=0 + + ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + lret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + lret=1 + fi + + ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + lret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + lret=1 + fi + + return $lret +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +ip -Version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the ip tool" + exit $ksft_skip +fi + +# needed to check if policy lookup got valid ipsec result +iptables --version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iptables tool" + exit $ksft_skip +fi + +for i in 1 2 3 4; do + ip netns add ns$i + ip -net ns$i link set lo up +done + +DEV=veth0 +ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 +ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 + +ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 + +DEV=veth0 +for i in 1 2; do + ip -net ns$i link set $DEV up + ip -net ns$i addr add 10.0.$i.2/24 dev $DEV + ip -net ns$i addr add dead:$i::2/64 dev $DEV + + ip -net ns$i addr add 10.0.$i.253 dev $DEV + ip -net ns$i addr add 10.0.$i.254 dev $DEV + ip -net ns$i addr add dead:$i::fd dev $DEV + ip -net ns$i addr add dead:$i::fe dev $DEV +done + +for i in 3 4; do +ip -net ns$i link set eth1 up +ip -net ns$i link set veth0 up +done + +ip -net ns1 route add default via 10.0.1.1 +ip -net ns2 route add default via 10.0.2.1 + +ip -net ns3 addr add 10.0.1.1/24 dev eth1 +ip -net ns3 addr add 10.0.3.1/24 dev veth0 +ip -net ns3 addr add 2001:1::1/64 dev eth1 +ip -net ns3 addr add 2001:3::1/64 dev veth0 + +ip -net ns3 route add default via 10.0.3.10 + +ip -net ns4 addr add 10.0.2.1/24 dev eth1 +ip -net ns4 addr add 10.0.3.10/24 dev veth0 +ip -net ns4 addr add 2001:2::1/64 dev eth1 +ip -net ns4 addr add 2001:3::10/64 dev veth0 +ip -net ns4 route add default via 10.0.3.1 + +for j in 4 6; do + for i in 3 4;do + ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + done +done + +# abuse iptables rule counter to check if ping matches a policy +ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +if [ $? -ne 0 ];then + echo "SKIP: Could not insert iptables rule" + for i in 1 2 3 4;do ip netns del ns$i;done + exit $ksft_skip +fi + +# localip remoteip localnet remotenet +do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 + +do_dummies4 ns3 +do_dummies6 ns4 + +do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 +do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 + +# ping to .254 should use ipsec, exception is not installed. +check_xfrm 1 254 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to use ipsec tunnel" + ret=1 +else + echo "PASS: policy before exception matches" +fi + +# installs exceptions +# localip remoteip encryptdst plaindst +do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 + +do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 + +# ping to .254 should now be excluded from the tunnel +check_xfrm 0 254 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to fail" + ret=1 +else + echo "PASS: ping to .254 bypassed ipsec tunnel" +fi + +# ping to .253 should use use ipsec due to direct policy exception. +check_xfrm 1 253 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .253 to use ipsec tunnel" + ret=1 +else + echo "PASS: direct policy matches" +fi + +# ping to .2 should use ipsec. +check_xfrm 1 2 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .2 to use ipsec tunnel" + ret=1 +else + echo "PASS: policy matches" +fi + +for i in 1 2 3 4;do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile new file mode 100644 index 000000000000..47ed6cef93fb --- /dev/null +++ b/tools/testing/selftests/netfilter/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for netfilter selftests + +TEST_PROGS := nft_trans_stress.sh + +include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config new file mode 100644 index 000000000000..1017313e41a8 --- /dev/null +++ b/tools/testing/selftests/netfilter/config @@ -0,0 +1,2 @@ +CONFIG_NET_NS=y +NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh new file mode 100755 index 000000000000..f1affd12c4b1 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# This test is for stress-testing the nf_tables config plane path vs. +# packet path processing: Make sure we never release rules that are +# still visible to other cpus. +# +# set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +testns=testns1 +tables="foo bar baz quux" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +tmp=$(mktemp) + +for table in $tables; do + echo add table inet "$table" >> "$tmp" + echo flush table inet "$table" >> "$tmp" + + echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" + echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + echo "add chain inet $table $chain" >> "$tmp" + done + + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + for BASE in INPUT OUTPUT; do + echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" + done + echo "add rule inet $table $chain counter return" >> "$tmp" + done +done + +ip netns add "$testns" +ip -netns "$testns" link set lo up + +lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; +cpunum=$((cpunum-1)) +for i in $(seq 0 $cpunum);do + mask=$(printf 0x%x $((1<<$i))) + ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & + ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & +done) + +sleep 1 + +for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done + +for table in $tables;do + randsleep=$((RANDOM%10)) + sleep $randsleep + ip netns exec "$testns" nft delete table inet $table 2>/dev/null +done + +randsleep=$((RANDOM%10)) +sleep $randsleep + +pkill -9 ping + +wait + +rm -f "$tmp" +ip netns del "$testns" diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 14cfcf006936..6c17a93c4e03 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I../../../../../usr/include -TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_PROGS := txtimestamp.sh all: $(TEST_PROGS) @@ -9,4 +10,4 @@ top_srcdir = ../../../../.. include ../../lib.mk clean: - rm -fr $(TEST_PROGS) + rm -fr $(TEST_GEN_FILES) diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config new file mode 100644 index 000000000000..a13e3169b0a4 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/config @@ -0,0 +1,2 @@ +CONFIG_IFB=y +CONFIG_NET_SCH_NETEM=y diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 81a98a240456..2e563d17cf0c 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -39,6 +39,7 @@ #include <inttypes.h> #include <linux/errqueue.h> #include <linux/if_ether.h> +#include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <netdb.h> #include <net/if.h> @@ -69,15 +70,67 @@ static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; +static int cfg_delay_snd; +static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_loop_nodata; static bool cfg_no_delay; +static bool cfg_use_cmsg; +static bool cfg_use_pf_packet; +static bool cfg_do_listen; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; -static struct timespec ts_prev; +static struct timespec ts_usr; + +static int saved_tskey = -1; +static int saved_tskey_type = -1; + +static bool test_failed; + +static int64_t timespec_to_us64(struct timespec *ts) +{ + return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; +} + +static void validate_key(int tskey, int tstype) +{ + int stepsize; + + /* compare key for each subsequent request + * must only test for one type, the first one requested + */ + if (saved_tskey == -1) + saved_tskey_type = tstype; + else if (saved_tskey_type != tstype) + return; + + stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + if (tskey != saved_tskey + stepsize) { + fprintf(stderr, "ERROR: key %d, expected %d\n", + tskey, saved_tskey + stepsize); + test_failed = true; + } + + saved_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, int min_delay) +{ + int max_delay = min_delay + 500 /* processing time upper bound */; + int64_t cur64, start64; + + cur64 = timespec_to_us64(cur); + start64 = timespec_to_us64(&ts_usr); + + if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { + fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + cur64 - start64, min_delay, max_delay); + test_failed = true; + } +} static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) @@ -89,32 +142,19 @@ static void __print_timestamp(const char *name, struct timespec *cur, name, cur->tv_sec, cur->tv_nsec / 1000, key, payload_len); - if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { - int64_t cur_ms, prev_ms; - - cur_ms = (long) cur->tv_sec * 1000 * 1000; - cur_ms += cur->tv_nsec / 1000; - - prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; - prev_ms += ts_prev.tv_nsec / 1000; - - fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); - } + if (cur != &ts_usr) + fprintf(stderr, " (USR %+" PRId64 " us)", + timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); - ts_prev = *cur; fprintf(stderr, "\n"); } static void print_timestamp_usr(void) { - struct timespec ts; - struct timeval tv; /* avoid dependency on -lrt */ - - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec; - ts.tv_nsec = tv.tv_usec * 1000; + if (clock_gettime(CLOCK_REALTIME, &ts_usr)) + error(1, errno, "clock_gettime"); - __print_timestamp(" USR", &ts, 0, 0); + __print_timestamp(" USR", &ts_usr, 0, 0); } static void print_timestamp(struct scm_timestamping *tss, int tstype, @@ -122,15 +162,20 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, { const char *tsname; + validate_key(tskey, tstype); + switch (tstype) { case SCM_TSTAMP_SCHED: tsname = " ENQ"; + validate_timestamp(&tss->ts[0], 0); break; case SCM_TSTAMP_SND: tsname = " SND"; + validate_timestamp(&tss->ts[0], cfg_delay_snd); break; case SCM_TSTAMP_ACK: tsname = " ACK"; + validate_timestamp(&tss->ts[0], cfg_delay_ack); break; default: error(1, 0, "unknown timestamp type: %u", @@ -194,7 +239,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else if ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level = SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -269,32 +316,124 @@ static int recv_errmsg(int fd) return ret == -1; } -static void do_test(int family, unsigned int opt) +static uint16_t get_ip_csum(const uint16_t *start, int num_words, + unsigned long sum) +{ + int i; + + for (i = 0; i < num_words; i++) + sum += start[i]; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t get_udp_csum(const struct udphdr *udph, int alen) +{ + unsigned long pseudo_sum, csum_len; + const void *csum_start = udph; + + pseudo_sum = htons(IPPROTO_UDP); + pseudo_sum += udph->len; + + /* checksum ip(v6) addresses + udp header + payload */ + csum_start -= alen * 2; + csum_len = ntohs(udph->len) + alen * 2; + + return get_ip_csum(csum_start, csum_len >> 1, pseudo_sum); +} + +static int fill_header_ipv4(void *p) +{ + struct iphdr *iph = p; + + memset(iph, 0, sizeof(*iph)); + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->saddr = daddr.sin_addr.s_addr; /* set for udp csum calc */ + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + + /* kernel writes saddr, csum, len */ + + return sizeof(*iph); +} + +static int fill_header_ipv6(void *p) +{ + struct ipv6hdr *ip6h = p; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct udphdr) + cfg_payload_len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 64; + + ip6h->saddr = daddr6.sin6_addr; + ip6h->daddr = daddr6.sin6_addr; + + /* kernel does not write saddr in case of ipv6 */ + + return sizeof(*ip6h); +} + +static void fill_header_udp(void *p, bool is_ipv4) { + struct udphdr *udph = p; + + udph->source = ntohs(dest_port + 1); /* spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; + + udph->check = get_udp_csum(udph, is_ipv4 ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); +} + +static void do_test(int family, unsigned int report_opt) +{ + char control[CMSG_SPACE(sizeof(uint32_t))]; + struct sockaddr_ll laddr; + unsigned int sock_opt; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; char *buf; int fd, i, val = 1, total_len; - if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { - /* due to lack of checksum generation code */ - fprintf(stderr, "test: skipping datagram over IPv6\n"); - return; - } - total_len = cfg_payload_len; - if (cfg_proto == SOCK_RAW) { + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_ipproto == IPPROTO_RAW) - total_len += sizeof(struct iphdr); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (family == PF_INET) + total_len += sizeof(struct iphdr); + else + total_len += sizeof(struct ipv6hdr); + + /* special case, only rawv6_sendmsg: + * pass proto in sin6_port if not connected + * also see ANK comment in net/ipv4/raw.c + */ + daddr6.sin6_port = htons(cfg_ipproto); } buf = malloc(total_len); if (!buf) error(1, 0, "malloc"); - fd = socket(family, cfg_proto, cfg_ipproto); + fd = socket(cfg_use_pf_packet ? PF_PACKET : family, + cfg_proto, cfg_ipproto); if (fd < 0) error(1, errno, "socket"); + /* reset expected key on each new socket */ + saved_tskey = -1; + if (cfg_proto == SOCK_STREAM) { if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) @@ -321,54 +460,80 @@ static void do_test(int family, unsigned int opt) } } - opt |= SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_CMSG | - SOF_TIMESTAMPING_OPT_ID; + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + + if (!cfg_use_cmsg) + sock_opt |= report_opt; + if (cfg_loop_nodata) - opt |= SOF_TIMESTAMPING_OPT_TSONLY; + sock_opt |= SOF_TIMESTAMPING_OPT_TSONLY; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - (char *) &opt, sizeof(opt))) + (char *) &sock_opt, sizeof(sock_opt))) error(1, 0, "setsockopt timestamping"); for (i = 0; i < cfg_num_pkts; i++) { - memset(&ts_prev, 0, sizeof(ts_prev)); + memset(&msg, 0, sizeof(msg)); memset(buf, 'a' + i, total_len); - if (cfg_proto == SOCK_RAW) { - struct udphdr *udph; + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { int off = 0; - if (cfg_ipproto == IPPROTO_RAW) { - struct iphdr *iph = (void *) buf; - - memset(iph, 0, sizeof(*iph)); - iph->ihl = 5; - iph->version = 4; - iph->ttl = 2; - iph->daddr = daddr.sin_addr.s_addr; - iph->protocol = IPPROTO_UDP; - /* kernel writes saddr, csum, len */ - - off = sizeof(*iph); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { + if (family == PF_INET) + off = fill_header_ipv4(buf); + else + off = fill_header_ipv6(buf); } - udph = (void *) buf + off; - udph->source = ntohs(9000); /* random spoof */ - udph->dest = ntohs(dest_port); - udph->len = ntohs(sizeof(*udph) + cfg_payload_len); - udph->check = 0; /* not allowed for IPv6 */ + fill_header_udp(buf + off, family == PF_INET); } print_timestamp_usr(); + + iov.iov_base = buf; + iov.iov_len = total_len; + if (cfg_proto != SOCK_STREAM) { - if (family == PF_INET) - val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); - else - val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); - } else { - val = send(fd, buf, cfg_payload_len, 0); + if (cfg_use_pf_packet) { + memset(&laddr, 0, sizeof(laddr)); + + laddr.sll_family = AF_PACKET; + laddr.sll_ifindex = 1; + laddr.sll_protocol = htons(family == AF_INET ? ETH_P_IP : ETH_P_IPV6); + laddr.sll_halen = ETH_ALEN; + + msg.msg_name = (void *)&laddr; + msg.msg_namelen = sizeof(laddr); + } else if (family == PF_INET) { + msg.msg_name = (void *)&daddr; + msg.msg_namelen = sizeof(daddr); + } else { + msg.msg_name = (void *)&daddr6; + msg.msg_namelen = sizeof(daddr6); + } + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (cfg_use_cmsg) { + memset(control, 0, sizeof(control)); + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } + + val = sendmsg(fd, &msg, 0); if (val != total_len) error(1, errno, "send"); @@ -385,7 +550,7 @@ static void do_test(int family, unsigned int opt) error(1, errno, "close"); free(buf); - usleep(400 * 1000); + usleep(100 * 1000); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -396,15 +561,20 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -6: only IPv6\n" " -h: show this message\n" " -c N: number of packets for each test\n" + " -C: use cmsg to set tstamp recording options\n" " -D: no delay between packets\n" " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -L listen on hostname and port\n" " -n: set no-payload option\n" + " -p N: connect to port N\n" + " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" - " -p N: connect to port N\n" " -u: use udp\n" + " -v: validate SND delay (usec)\n" + " -V: validate ACK delay (usec)\n" " -x: show payload (up to 70 bytes)\n", filepath); exit(1); @@ -413,9 +583,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) static void parse_opt(int argc, char **argv) { int proto_count = 0; - char c; + int c; - while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -426,6 +596,9 @@ static void parse_opt(int argc, char **argv) case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; + case 'C': + cfg_use_cmsg = true; + break; case 'D': cfg_no_delay = true; break; @@ -435,9 +608,24 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; + case 'L': + cfg_do_listen = true; + break; case 'n': cfg_loop_nodata = true; break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'P': + proto_count++; + cfg_use_pf_packet = true; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = 0; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -453,11 +641,11 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_DGRAM; cfg_ipproto = IPPROTO_UDP; break; - case 'l': - cfg_payload_len = strtoul(optarg, NULL, 10); + case 'v': + cfg_delay_snd = strtoul(optarg, NULL, 10); break; - case 'p': - dest_port = strtoul(optarg, NULL, 10); + case 'V': + cfg_delay_ack = strtoul(optarg, NULL, 10); break; case 'x': cfg_show_payload = true; @@ -475,7 +663,9 @@ static void parse_opt(int argc, char **argv) if (!do_ipv4 && !do_ipv6) error(1, 0, "pass -4 or -6, not both"); if (proto_count > 1) - error(1, 0, "pass -r, -R or -u, not multiple"); + error(1, 0, "pass -P, -r, -R or -u, not multiple"); + if (cfg_do_pktinfo && cfg_use_pf_packet) + error(1, 0, "cannot ask for pktinfo over pf_packet"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); @@ -483,10 +673,12 @@ static void parse_opt(int argc, char **argv) static void resolve_hostname(const char *hostname) { + struct addrinfo hints = { .ai_family = do_ipv4 ? AF_INET : AF_INET6 }; struct addrinfo *addrs, *cur; int have_ipv4 = 0, have_ipv6 = 0; - if (getaddrinfo(hostname, NULL, NULL, &addrs)) +retry: + if (getaddrinfo(hostname, NULL, &hints, &addrs)) error(1, errno, "getaddrinfo"); cur = addrs; @@ -506,14 +698,41 @@ static void resolve_hostname(const char *hostname) if (addrs) freeaddrinfo(addrs); + if (do_ipv6 && hints.ai_family != AF_INET6) { + hints.ai_family = AF_INET6; + goto retry; + } + do_ipv4 &= have_ipv4; do_ipv6 &= have_ipv6; } +static void do_listen(int family, void *addr, int alen) +{ + int fd, type; + + type = cfg_proto == SOCK_RAW ? SOCK_DGRAM : cfg_proto; + + fd = socket(family, type, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (bind(fd, addr, alen)) + error(1, errno, "bind rx"); + + if (type == SOCK_STREAM && listen(fd, 10)) + error(1, errno, "listen rx"); + + /* leave fd open, will be closed on process exit. + * this enables connect() to succeed and avoids icmp replies + */ +} + static void do_main(int family) { - fprintf(stderr, "family: %s\n", - family == PF_INET ? "INET" : "INET6"); + fprintf(stderr, "family: %s %s\n", + family == PF_INET ? "INET" : "INET6", + cfg_use_pf_packet ? "(PF_PACKET)" : ""); fprintf(stderr, "test SND\n"); do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); @@ -555,10 +774,17 @@ int main(int argc, char **argv) fprintf(stderr, "server port: %u\n", dest_port); fprintf(stderr, "\n"); - if (do_ipv4) + if (do_ipv4) { + if (cfg_do_listen) + do_listen(PF_INET, &daddr, sizeof(daddr)); do_main(PF_INET); - if (do_ipv6) + } + + if (do_ipv6) { + if (cfg_do_listen) + do_listen(PF_INET6, &daddr6, sizeof(daddr6)); do_main(PF_INET6); + } - return 0; + return test_failed; } diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh new file mode 100755 index 000000000000..df0d86ca72b7 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Send packets with transmit timestamps over loopback with netem +# Verify that timestamps correspond to netem delay + +set -e + +setup() { + # set 1ms delay on lo egress + tc qdisc add dev lo root netem delay 1ms + + # set 2ms delay on ifb0 egress + modprobe ifb + ip link add ifb_netem0 type ifb + ip link set dev ifb_netem0 up + tc qdisc add dev ifb_netem0 root netem delay 2ms + + # redirect lo ingress through ifb0 egress + tc qdisc add dev lo handle ffff: ingress + tc filter add dev lo parent ffff: \ + u32 match mark 0 0xffff \ + action mirred egress redirect dev ifb_netem0 +} + +run_test_v4v6() { + # SND will be delayed 1000us + # ACK will be delayed 6000us: 1 + 2 ms round-trip + local -r args="$@ -v 1000 -V 6000" + + ./txtimestamp ${args} -4 -L 127.0.0.1 + ./txtimestamp ${args} -6 -L ::1 +} + +run_test_tcpudpraw() { + local -r args=$@ + + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet +} + +run_test_all() { + run_test_tcpudpraw # setsockopt + run_test_tcpudpraw -C # cmsg + run_test_tcpudpraw -n # timestamp w/o data +} + +if [[ "$(ip netns identify)" == "root" ]]; then + ../../net/in_netns.sh $0 $@ +else + setup + run_test_all + echo "OK. All tests passed" +fi diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 1b0e9e9a2ddc..f2fa101c5a6a 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -47,8 +47,9 @@ static int ok(void) return 0; } -#define REG_POISON 0x5a5aUL -#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n)) +#define REG_POISON 0x5a5a +#define POISONED_REG(n) ((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \ + (((unsigned long)REG_POISON) << 16) | (n)) static inline void poison_regs(void) { @@ -105,6 +106,20 @@ static void dump_regs(void) } } +#ifdef _CALL_AIXDESC +struct opd { + unsigned long ip; + unsigned long toc; + unsigned long env; +}; +static struct opd bad_opd = { + .ip = BAD_NIP, +}; +#define BAD_FUNC (&bad_opd) +#else +#define BAD_FUNC BAD_NIP +#endif + int test_wild_bctr(void) { int (*func_ptr)(void); @@ -133,7 +148,7 @@ int test_wild_bctr(void) poison_regs(); - func_ptr = (int (*)(void))BAD_NIP; + func_ptr = (int (*)(void))BAD_FUNC; func_ptr(); FAIL_IF(1); /* we didn't segv? */ diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 6f1f4a6e1ecb..85744425b08d 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -13,7 +13,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* Test readlink /proc/self/map_files/... with address 0. */ +/* Test readlink /proc/self/map_files/... with minimum address. */ #include <errno.h> #include <sys/types.h> #include <sys/stat.h> @@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); +#ifdef __arm__ + unsigned long va = 2 * PAGE_SIZE; +#else + unsigned long va = 0; +#endif void *p; int fd; unsigned long a, b; @@ -55,7 +60,7 @@ int main(void) if (fd == -1) return 1; - p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); if (p == MAP_FAILED) { if (errno == EPERM) return 2; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e1473234968d..c9a2abf8be1b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2731,9 +2731,14 @@ TEST(syscall_restart) ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); ASSERT_EQ(true, WIFSTOPPED(status)); ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); - /* Verify signal delivery came from parent now. */ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); - EXPECT_EQ(getpid(), info.si_pid); + /* + * There is no siginfo on SIGSTOP any more, so we can't verify + * signal delivery came from parent now (getpid() == info.si_pid). + * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com + * At least verify the SIGSTOP via PTRACE_GETSIGINFO. + */ + EXPECT_EQ(SIGSTOP, info.si_signo); /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ ASSERT_EQ(0, kill(child_pid, SIGCONT)); diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index 7a60b85e148f..c5cc160948b3 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,2 +1,5 @@ __pycache__/ *.pyc +plugins/ +*.xml +*.tap diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py index 3ee9a6dacb52..1d9e279331eb 100644 --- a/tools/testing/selftests/tc-testing/TdcPlugin.py +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -18,11 +18,12 @@ class TdcPlugin: if self.args.verbose > 1: print(' -- {}.post_suite'.format(self.sub_class)) - def pre_case(self, test_ordinal, testid): + def pre_case(self, test_ordinal, testid, test_name): '''run commands before test_runner does one test''' if self.args.verbose > 1: print(' -- {}.pre_case'.format(self.sub_class)) self.args.testid = testid + self.args.test_name = test_name self.args.test_ordinal = test_ordinal def post_case(self): diff --git a/tools/testing/selftests/tc-testing/TdcResults.py b/tools/testing/selftests/tc-testing/TdcResults.py new file mode 100644 index 000000000000..1e4d95fdf8d0 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcResults.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +from enum import Enum + +class ResultState(Enum): + noresult = -1 + skip = 0 + success = 1 + fail = 2 + +class TestResult: + def __init__(self, test_id="", test_name=""): + self.test_id = test_id + self.test_name = test_name + self.result = ResultState.noresult + self.failmsg = "" + self.errormsg = "" + self.steps = [] + + def set_result(self, result): + if (isinstance(result, ResultState)): + self.result = result + return True + else: + raise TypeError('Unknown result type, must be type ResultState') + + def get_result(self): + return self.result + + def set_errormsg(self, errormsg): + self.errormsg = errormsg + return True + + def append_errormsg(self, errormsg): + self.errormsg = '{}\n{}'.format(self.errormsg, errormsg) + + def get_errormsg(self): + return self.errormsg + + def set_failmsg(self, failmsg): + self.failmsg = failmsg + return True + + def append_failmsg(self, failmsg): + self.failmsg = '{}\n{}'.format(self.failmsg, failmsg) + + def get_failmsg(self): + return self.failmsg + + def add_steps(self, newstep): + if type(newstep) == list: + self.steps.extend(newstep) + elif type(newstep) == str: + self.steps.append(step) + else: + raise TypeError('TdcResults.add_steps() requires a list or str') + + def get_executed_steps(self): + return self.steps + +class TestSuiteReport(): + _testsuite = [] + + def add_resultdata(self, result_data): + if isinstance(result_data, TestResult): + self._testsuite.append(result_data) + return True + + def count_tests(self): + return len(self._testsuite) + + def count_failures(self): + return sum(1 for t in self._testsuite if t.result == ResultState.fail) + + def count_skips(self): + return sum(1 for t in self._testsuite if t.result == ResultState.skip) + + def find_result(self, test_id): + return next((tr for tr in self._testsuite if tr.test_id == test_id), None) + + def update_result(self, result_data): + orig = self.find_result(result_data.test_id) + if orig != None: + idx = self._testsuite.index(orig) + self._testsuite[idx] = result_data + else: + self.add_resultdata(result_data) + + def format_tap(self): + ftap = "" + ftap += '1..{}\n'.format(self.count_tests()) + index = 1 + for t in self._testsuite: + if t.result == ResultState.fail: + ftap += 'not ' + ftap += 'ok {} {} - {}'.format(str(index), t.test_id, t.test_name) + if t.result == ResultState.skip or t.result == ResultState.noresult: + ftap += ' # skipped - {}\n'.format(t.errormsg) + elif t.result == ResultState.fail: + if len(t.steps) > 0: + ftap += '\tCommands executed in this test case:' + for step in t.steps: + ftap += '\n\t\t{}'.format(step) + ftap += '\n\t{}'.format(t.failmsg) + ftap += '\n' + index += 1 + return ftap + + def format_xunit(self): + from xml.sax.saxutils import escape + xunit = "<testsuites>\n" + xunit += '\t<testsuite tests=\"{}\" skips=\"{}\">\n'.format(self.count_tests(), self.count_skips()) + for t in self._testsuite: + xunit += '\t\t<testcase classname=\"{}\" '.format(escape(t.test_id)) + xunit += 'name=\"{}\">\n'.format(escape(t.test_name)) + if t.failmsg: + xunit += '\t\t\t<failure>\n' + if len(t.steps) > 0: + xunit += 'Commands executed in this test case:\n' + for step in t.steps: + xunit += '\t{}\n'.format(escape(step)) + xunit += 'FAILURE: {}\n'.format(escape(t.failmsg)) + xunit += '\t\t\t</failure>\n' + if t.errormsg: + xunit += '\t\t\t<error>\n{}\n'.format(escape(t.errormsg)) + xunit += '\t\t\t</error>\n' + if t.result == ResultState.skip: + xunit += '\t\t\t<skipped/>\n' + xunit += '\t\t</testcase>\n' + xunit += '\t</testsuite>\n' + xunit += '</testsuites>\n' + return xunit diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py index 477a7bd7d7fb..e00c798de0bb 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -11,6 +11,7 @@ from string import Template import subprocess import time from TdcPlugin import TdcPlugin +from TdcResults import * from tdc_config import * @@ -21,6 +22,7 @@ class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'valgrind/SubPlugin' self.tap = '' + self._tsr = TestSuiteReport() super().__init__() def pre_suite(self, testcount, testidlist): @@ -34,10 +36,14 @@ class SubPlugin(TdcPlugin): def post_suite(self, index): '''run commands after test_runner goes into a test loop''' super().post_suite(index) - self._add_to_tap('\n|---\n') if self.args.verbose > 1: print('{}.post_suite'.format(self.sub_class)) - print('{}'.format(self.tap)) + #print('{}'.format(self.tap)) + for xx in range(index - 1, self.testcount): + res = TestResult('{}-mem'.format(self.testidlist[xx]), 'Test skipped') + res.set_result(ResultState.skip) + res.set_errormsg('Skipped because of prior setup/teardown failure') + self._add_results(res) if self.args.verbose < 4: subprocess.check_output('rm -f vgnd-*.log', shell=True) @@ -128,8 +134,17 @@ class SubPlugin(TdcPlugin): nle_num = int(nle_mo.group(1)) mem_results = '' + res = TestResult('{}-mem'.format(self.args.testid), + '{} memory leak check'.format(self.args.test_name)) if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): mem_results += 'not ' + res.set_result(ResultState.fail) + res.set_failmsg('Memory leak detected') + res.append_failmsg(content) + else: + res.set_result(ResultState.success) + + self._add_results(res) mem_results += 'ok {} - {}-mem # {}\n'.format( self.args.test_ordinal, self.args.testid, 'memory leak check') @@ -138,5 +153,8 @@ class SubPlugin(TdcPlugin): print('{}'.format(content)) self._add_to_tap(content) + def _add_results(self, res): + self._tsr.add_resultdata(res) + def _add_to_tap(self, more_tap_output): self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 87a04a8a5945..e6e4ce80a726 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -23,6 +23,7 @@ from tdc_config import * from tdc_helper import * import TdcPlugin +from TdcResults import * class PluginMgrTestFail(Exception): @@ -60,10 +61,10 @@ class PluginMgr: for pgn_inst in reversed(self.plugin_instances): pgn_inst.post_suite(index) - def call_pre_case(self, test_ordinal, testid): + def call_pre_case(self, test_ordinal, testid, test_name): for pgn_inst in self.plugin_instances: try: - pgn_inst.pre_case(test_ordinal, testid) + pgn_inst.pre_case(test_ordinal, testid, test_name) except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) @@ -102,7 +103,6 @@ class PluginMgr: self.argparser = argparse.ArgumentParser( description='Linux TC unit tests') - def replace_keywords(cmd): """ For a given executable command, substitute any known @@ -131,12 +131,16 @@ def exec_cmd(args, pm, stage, command): stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENVIR) - (rawout, serr) = proc.communicate() - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") + try: + (rawout, serr) = proc.communicate(timeout=NAMES['TIMEOUT']) + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8", errors="ignore") + else: + foutput = rawout.decode("utf-8", errors="ignore") + except subprocess.TimeoutExpired: + foutput = "Command \"{}\" timed out\n".format(command) + proc.returncode = 255 proc.stdout.close() proc.stderr.close() @@ -169,6 +173,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): file=sys.stderr) print("\n{} *** Error message: \"{}\"".format(prefix, foutput), file=sys.stderr) + print("returncode {}; expected {}".format(proc.returncode, + exit_codes)) print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) @@ -181,6 +187,7 @@ def run_one_test(pm, args, index, tidx): result = True tresult = "" tap = "" + res = TestResult(tidx['id'], tidx['name']) if args.verbose > 0: print("\t====================\n=====> ", end="") print("Test " + tidx["id"] + ": " + tidx["name"]) @@ -188,19 +195,26 @@ def run_one_test(pm, args, index, tidx): # populate NAMES with TESTID for this test NAMES['TESTID'] = tidx['id'] - pm.call_pre_case(index, tidx['id']) + pm.call_pre_case(index, tidx['id'], tidx['name']) prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) if (args.verbose > 0): print('-----> execute stage') pm.call_pre_execute() (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) - exit_code = p.returncode + if p: + exit_code = p.returncode + else: + exit_code = None + pm.call_post_execute() - if (exit_code != int(tidx["expExitCode"])): - result = False - print("exit:", exit_code, int(tidx["expExitCode"])) + if (exit_code is None or exit_code != int(tidx["expExitCode"])): + print("exit: {!r}".format(exit_code)) + print("exit: {}".format(int(tidx["expExitCode"]))) + #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"]))) + res.set_result(ResultState.fail) + res.set_failmsg('Command exited with {}, expected {}\n{}'.format(exit_code, tidx["expExitCode"], procout)) print(procout) else: if args.verbose > 0: @@ -211,20 +225,15 @@ def run_one_test(pm, args, index, tidx): if procout: match_index = re.findall(match_pattern, procout) if len(match_index) != int(tidx["matchCount"]): - result = False + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) elif int(tidx["matchCount"]) != 0: - result = False - - if not result: - tresult += 'not ' - tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) - tap += tresult - - if result == False: - if procout: - tap += procout + res.set_result(ResultState.fail) + res.set_failmsg('No output generated by verify command.') else: - tap += 'No output!\n' + res.set_result(ResultState.success) prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) pm.call_post_case() @@ -233,7 +242,7 @@ def run_one_test(pm, args, index, tidx): # remove TESTID from NAMES del(NAMES['TESTID']) - return tap + return res def test_runner(pm, args, filtered_tests): """ @@ -253,25 +262,15 @@ def test_runner(pm, args, filtered_tests): emergency_exit = False emergency_exit_message = '' - if args.notap: - if args.verbose: - tap = 'notap requested: omitting test plan\n' - else: - tap = str(index) + ".." + str(tcount) + "\n" + tsr = TestSuiteReport() + try: pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) except Exception as ee: ex_type, ex, ex_tb = sys.exc_info() print('Exception {} {} (caught in pre_suite).'. format(ex_type, ex)) - # when the extra print statements are uncommented, - # the traceback does not appear between them - # (it appears way earlier in the tdc.py output) - # so don't bother ... - # print('--------------------(') - # print('traceback') traceback.print_tb(ex_tb) - # print('--------------------)') emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) emergency_exit = True stage = 'pre-SUITE' @@ -287,15 +286,26 @@ def test_runner(pm, args, filtered_tests): if args.verbose > 1: print('Not executing test {} {} because DEV2 not defined'. format(tidx['id'], tidx['name'])) + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg('Not executed because DEV2 is not defined') + tsr.add_resultdata(res) continue try: badtest = tidx # in case it goes bad - tap += run_one_test(pm, args, index, tidx) + res = run_one_test(pm, args, index, tidx) + tsr.add_resultdata(res) except PluginMgrTestFail as pmtf: ex_type, ex, ex_tb = sys.exc_info() stage = pmtf.stage message = pmtf.message output = pmtf.output + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg(pmtf.message) + res.set_failmsg(pmtf.output) + tsr.add_resultdata(res) + index += 1 print(message) print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) @@ -314,16 +324,16 @@ def test_runner(pm, args, filtered_tests): # if we failed in setup or teardown, # fill in the remaining tests with ok-skipped count = index - if not args.notap: - tap += 'about to flush the tap output if tests need to be skipped\n' - if tcount + 1 != index: - for tidx in testlist[index - 1:]: - msg = 'skipped - previous {} failed'.format(stage) - tap += 'ok {} - {} # {} {} {}\n'.format( - count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) - count += 1 - tap += 'done flushing skipped test tap output\n' + if tcount + 1 != count: + for tidx in testlist[count - 1:]: + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + msg = 'skipped - previous {} failed {} {}'.format(stage, + index, badtest.get('id', '--Unknown--')) + res.set_errormsg(msg) + tsr.add_resultdata(res) + count += 1 if args.pause: print('Want to pause\nPress enter to continue ...') @@ -332,7 +342,7 @@ def test_runner(pm, args, filtered_tests): pm.call_post_suite(index) - return tap + return tsr def has_blank_ids(idlist): """ @@ -373,6 +383,10 @@ def set_args(parser): Set the command line arguments for tdc. """ parser.add_argument( + '--outfile', type=str, + help='Path to the file in which results should be saved. ' + + 'Default target is the current directory.') + parser.add_argument( '-p', '--path', type=str, help='The full path to the tc executable to use') sg = parser.add_argument_group( @@ -408,8 +422,9 @@ def set_args(parser): '-v', '--verbose', action='count', default=0, help='Show the commands that are being run') parser.add_argument( - '-N', '--notap', action='store_true', - help='Suppress tap results for command under test') + '--format', default='tap', const='tap', nargs='?', + choices=['none', 'xunit', 'tap'], + help='Specify the format for test results. (Default: TAP)') parser.add_argument('-d', '--device', help='Execute the test case in flower category') parser.add_argument( @@ -430,6 +445,8 @@ def check_default_settings(args, remaining, pm): NAMES['TC'] = args.path if args.device != None: NAMES['DEV2'] = args.device + if 'TIMEOUT' not in NAMES: + NAMES['TIMEOUT'] = None if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -624,12 +641,30 @@ def set_operation_mode(pm, args): if len(alltests): catresults = test_runner(pm, args, alltests) + if args.format == 'none': + print('Test results output suppression requested\n') + else: + print('\nAll test results: \n') + if args.format == 'xunit': + suffix = 'xml' + res = catresults.format_xunit() + elif args.format == 'tap': + suffix = 'tap' + res = catresults.format_tap() + print(res) + print('\n\n') + if not args.outfile: + fname = 'test-results.{}'.format(suffix) + else: + fname = args.outfile + with open(fname, 'w') as fh: + fh.write(res) + fh.close() + if os.getenv('SUDO_UID') is not None: + os.chown(fname, uid=int(os.getenv('SUDO_UID')), + gid=int(os.getenv('SUDO_GID'))) else: - catresults = 'No tests found\n' - if args.notap: - print('Tap output suppression requested\n') - else: - print('All test results: \n\n{}'.format(catresults)) + print('No tests found\n') def main(): """ diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index d651bc1501bd..6d91e48c2625 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -15,6 +15,8 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'BATCH_FILE': './batch.txt', + # Length of time in seconds to wait before terminating a command + 'TIMEOUT': 12, # Name of the namespace to use 'NS': 'tcut', # Directory containing eBPF test programs diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index fb22bccfbc8a..7ef45a4a3cba 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -23,6 +23,10 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) +/* generic data direction definitions */ +#define READ 0 +#define WRITE 1 + typedef unsigned long long phys_addr_t; typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; |