diff options
Diffstat (limited to 'tools')
46 files changed, 1045 insertions, 360 deletions
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82..0662f644aad9 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 790944c35602..baee8591ac76 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -30,7 +30,8 @@ CGROUP COMMANDS | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | | **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** } +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -106,6 +107,7 @@ DESCRIPTION **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 358c7309d419..fe1b38e7e887 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -44,7 +44,7 @@ PROG COMMANDS | **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index d67518bcbd44..cc33c5824a2f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -478,7 +478,7 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt struct_ops \ + cgroup/setsockopt cgroup/sock_release struct_ops \ fentry fexit freplace sk_lookup" -- \ "$cur" ) ) return 0 @@ -1021,7 +1021,7 @@ _bpftool() device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt' + setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' case $prev in @@ -1032,7 +1032,7 @@ _bpftool() ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|getpeername4|\ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt) + recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index d901cc1b904a..6e53b1d393f4 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,7 +28,8 @@ " connect6 | getpeername4 | getpeername6 |\n" \ " getsockname4 | getsockname6 | sendmsg4 |\n" \ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt }" + " sysctl | getsockopt | setsockopt |\n" \ + " sock_release }" static unsigned int query_flags; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 3f067d2d7584..da4846c9856a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv) " cgroup/getpeername4 | cgroup/getpeername6 |\n" " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" - " cgroup/getsockopt | cgroup/setsockopt |\n" + " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index cd72016c3cfa..715092fc6a23 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -51,39 +51,39 @@ subdir-obj-y := build-file := $(dir)/Build -include $(build-file) -quiet_cmd_flex = FLEX $@ -quiet_cmd_bison = BISON $@ +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ # Create directory unless it exists -quiet_cmd_mkdir = MKDIR $(dir $@) +quiet_cmd_mkdir = MKDIR $(dir $@) cmd_mkdir = mkdir -p $(dir $@) rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) # Compile command -quiet_cmd_cc_o_c = CC $@ +quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -quiet_cmd_host_cc_o_c = HOSTCC $@ +quiet_cmd_host_cc_o_c = HOSTCC $@ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< -quiet_cmd_cxx_o_c = CXX $@ +quiet_cmd_cxx_o_c = CXX $@ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< -quiet_cmd_cpp_i_c = CPP $@ +quiet_cmd_cpp_i_c = CPP $@ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< -quiet_cmd_cc_s_c = AS $@ +quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< -quiet_cmd_gen = GEN $@ +quiet_cmd_gen = GEN $@ # Link agregate command # If there's nothing to link, create empty $@ object. -quiet_cmd_ld_multi = LD $@ +quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) -quiet_cmd_host_ld_multi = HOSTLD $@ +quiet_cmd_host_ld_multi = HOSTLD $@ cmd_host_ld_multi = $(if $(strip $(obj-y)),\ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -22,7 +22,7 @@ #include <linux/build_bug.h> #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,4 +3,12 @@ #include <vdso/const.h> +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bf8143505c49..f92880a15645 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a3cf437814..c41d9b2b59ac 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3216,6 +3216,9 @@ static int add_dummy_ksym_var(struct btf *btf) const struct btf_var_secinfo *vs; const struct btf_type *sec; + if (!btf) + return 0; + sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, BTF_KIND_DATASEC); if (sec_btf_id < 0) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ee426226928f..acbcf6c7bdf8 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -41,6 +41,11 @@ #define ELF_C_READ_MMAP ELF_C_READ #endif +/* Older libelf all end up in this expression, for both 32 and 64 bit */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#endif + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 1dcec73c910c..bcf3eca5afbe 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -108,9 +108,9 @@ displayed as follows: perf script --itrace=ibxwpe -F+flags -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. +The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, +in transaction, VM-entry, and VM-exit respectively. perf script also supports higher level ways to dump instruction traces: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5b8b61075039..48a5f5b26dd4 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -183,14 +183,15 @@ OPTIONS At this point usage is displayed, and perf-script exits. The flags field is synthesized and may have a value when Instruction - Trace decoding. The flags are "bcrosyiABEx" which stand for branch, + Trace decoding. The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, system, asynchronous, interrupt, - transaction abort, trace begin, trace end, and in transaction, + transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit respectively. Known combinations of flags are printed more nicely e.g. "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", - "tr end" for "bE". However the "x" flag will be display separately in those + "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch". + However the "x" flag will be displayed separately in those cases e.g. "jcc (x)" for a condition branch within a transaction. The callindent field is synthesized and may have a value when diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 87f5b1a4a7fa..833405c27dae 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) with_hits = true; + if (zstd_init(&(session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5a830ae09418..f9f74a514315 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -572,7 +572,8 @@ static int enable_counters(void) * - we have initial delay configured */ if (!target__none(&target) || stat_config.initial_delay) { - evlist__enable(evsel_list); + if (!all_counters_use_bpf) + evlist__enable(evsel_list); if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } @@ -581,13 +582,19 @@ static int enable_counters(void) static void disable_counters(void) { + struct evsel *counter; + /* * If we don't have tracee (attaching to task or cpu), counters may * still be running. To get accurate group ratios, we must stop groups * from counting before reading their constituent counters. */ - if (!target__none(&target)) - evlist__disable(evsel_list); + if (!target__none(&target)) { + evlist__for_each_entry(evsel_list, counter) + bpf_counter__disable(counter); + if (!all_counters_use_bpf) + evlist__disable(evsel_list); + } } static volatile int workload_exec_errno; diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 7daa8bb70a5a..711d4f9f5645 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -91,6 +91,11 @@ from __future__ import print_function import sys +# Only change warnings if the python -W option was not used +if not sys.warnoptions: + import warnings + # PySide2 causes deprecation warnings, ignore them. + warnings.filterwarnings("ignore", category=DeprecationWarning) import argparse import weakref import threading @@ -125,8 +130,9 @@ if pyside_version_1: from PySide.QtGui import * from PySide.QtSql import * -from decimal import * -from ctypes import * +from decimal import Decimal, ROUND_HALF_UP +from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \ + c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong from multiprocessing import Process, Array, Value, Event # xrange is range in Python3 @@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False): if with_hdr: model = indexes[0].model() for col in range(min_col, max_col + 1): - val = model.headerData(col, Qt.Horizontal) + val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole) if as_csv: text += sep + ToCSValue(val) sep = "," diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c index 76a53126efdf..d4b0ef74defc 100644 --- a/tools/perf/tests/pfm.c +++ b/tools/perf/tests/pfm.c @@ -131,8 +131,8 @@ static int test__pfm_group(void) }, { .events = "{},{instructions}", - .nr_events = 0, - .nr_groups = 0, + .nr_events = 1, + .nr_groups = 1, }, { .events = "{instructions},{instructions}", diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8a62fb39e365..19ad64f2bd83 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -100,7 +100,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABEx" +#define PERF_IP_FLAG_CHARS "bcrosyiABExgh" #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6e5c41528c7d..6ea3e677dc1e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_entry(evlist, pos) - bpf_counter__disable(pos); - /* Disable 'immediate' events last */ for (imm = 0; imm <= 1; imm++) { evlist__for_each_cpu(evlist, i, cpu) { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 8c59677bee13..20ad663978cc 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; decoder->state.type = INTEL_PT_TRANSACTION; + if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) + decoder->state.type |= INTEL_PT_BRANCH; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; @@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - if (intel_pt_fup_event(decoder)) + if (intel_pt_fup_event(decoder) && no_tip) return 0; return -EAGAIN; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 8658d42ce57a..0dfec8761b9a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += intel_pt_insn->length; - if (to_ip && *ip == to_ip) + if (to_ip && *ip == to_ip) { + intel_pt_insn->length = 0; goto out_no_cache; + } if (*ip >= al.map->end) break; @@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, static void intel_pt_sample_flags(struct intel_pt_queue *ptq) { + ptq->insn_len = 0; if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { @@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT; - ptq->insn_len = 0; } else { if (ptq->state->from_ip) ptq->flags = intel_pt_insn_type(ptq->state->insn_op); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4dad14265b81..84108c17f48d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { .symbol = "bpf-output", .alias = "", }, + [PERF_COUNT_SW_CGROUP_SWITCHES] = { + .symbol = "cgroup-switches", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -2928,9 +2932,14 @@ restart: } for (i = 0; i < max; i++, syms++) { + /* + * New attr.config still not supported here, the latest + * example was PERF_COUNT_SW_CGROUP_SWITCHES + */ + if (syms->symbol == NULL) + continue; - if (event_glob != NULL && syms->symbol != NULL && - !(strglobmatch(syms->symbol, event_glob) || + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fb8646cc3e83..923849024b15 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } +cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index d735acb6c29c..6eef6dfeaa57 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, } /* no event */ - if (*q == '\0') + if (*q == '\0') { + if (*sep == '}') { + if (grp_evt < 0) { + ui__error("cannot close a non-existing event group\n"); + goto error; + } + grp_evt--; + } continue; + } memset(&attr, 0, sizeof(attr)); event_attr_init(&attr); @@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, grp_evt = -1; } } + free(p_orig); return 0; error: free(p_orig); diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index f9271f3ea912..071312f5eb92 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -131,29 +131,29 @@ QUIET_SUBDIR1 = ifneq ($(silent),1) ifneq ($(V),1) - QUIET_CC = @echo ' CC '$@; - QUIET_CC_FPIC = @echo ' CC FPIC '$@; - QUIET_CLANG = @echo ' CLANG '$@; - QUIET_AR = @echo ' AR '$@; - QUIET_LINK = @echo ' LINK '$@; - QUIET_MKDIR = @echo ' MKDIR '$@; - QUIET_GEN = @echo ' GEN '$@; + QUIET_CC = @echo ' CC '$@; + QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; + QUIET_AR = @echo ' AR '$@; + QUIET_LINK = @echo ' LINK '$@; + QUIET_MKDIR = @echo ' MKDIR '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_SUBDIR0 = +@subdir= QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR '$$subdir; \ + echo ' SUBDIR '$$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir - QUIET_FLEX = @echo ' FLEX '$@; - QUIET_BISON = @echo ' BISON '$@; - QUIET_GENSKEL = @echo ' GEN-SKEL '$@; + QUIET_FLEX = @echo ' FLEX '$@; + QUIET_BISON = @echo ' BISON '$@; + QUIET_GENSKEL = @echo ' GENSKEL '$@; descend = \ - +@echo ' DESCEND '$(1); \ + +@echo ' DESCEND '$(1); \ mkdir -p $(OUTPUT)$(1) && \ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) - QUIET_CLEAN = @printf ' CLEAN %s\n' $1; - QUIET_INSTALL = @printf ' INSTALL %s\n' $1; - QUIET_UNINST = @printf ' UNINST %s\n' $1; + QUIET_CLEAN = @printf ' CLEAN %s\n' $1; + QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 12ee40284da0..2060bc122c53 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -static int settimeo(int fd, int timeout_ms) +int settimeo(int fd, int timeout_ms) { struct timeval timeout = { .tv_sec = 3 }; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 7205f8afdba1..5e0d51c07b63 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,6 +33,7 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; +int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index de78617f6550..f9a8ae331963 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -86,8 +86,9 @@ void test_ringbuf(void) const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; long bg_ret = -1; - int err, cnt; + int err, cnt, rb_fd; int page_size = getpagesize(); + void *mmap_ptr, *tmp_ptr; skel = test_ringbuf__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) @@ -101,6 +102,52 @@ void test_ringbuf(void) if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; + rb_fd = bpf_map__fd(skel->maps.ringbuf); + /* good read/write cons_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"); + tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE); + if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend")) + goto cleanup; + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + /* bad writeable prod_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos"); + ASSERT_EQ(err, -EPERM, "wr_prod_pos_err"); + + /* bad writeable data pages */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one"); + ASSERT_EQ(err, -EPERM, "wr_data_page_one_err"); + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two"); + mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all"); + + /* good read-only pages */ + mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro"); + + /* good read-only pages with initial offset */ + mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro"); + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c new file mode 100644 index 000000000000..5703c918812b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link + * between src and dst. The netns fwd has veth links to each src and dst. The + * client is in src and server in dst. The test installs a TC BPF program to each + * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the + * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace + * switch from ingress side; it also installs a checker prog on the egress side + * to drop unexpected traffic. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <linux/limits.h> +#include <linux/sysctl.h> +#include <linux/if_tun.h> +#include <linux/if.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/mount.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_neigh_fib.skel.h" +#include "test_tc_neigh.skel.h" +#include "test_tc_peer.skel.h" + +#define NS_SRC "ns_src" +#define NS_FWD "ns_fwd" +#define NS_DST "ns_dst" + +#define IP4_SRC "172.16.1.100" +#define IP4_DST "172.16.2.100" +#define IP4_TUN_SRC "172.17.1.100" +#define IP4_TUN_FWD "172.17.1.200" +#define IP4_PORT 9004 + +#define IP6_SRC "0::1:dead:beef:cafe" +#define IP6_DST "0::2:dead:beef:cafe" +#define IP6_TUN_SRC "1::1:dead:beef:cafe" +#define IP6_TUN_FWD "1::2:dead:beef:cafe" +#define IP6_PORT 9006 + +#define IP4_SLL "169.254.0.1" +#define IP4_DLL "169.254.0.2" +#define IP4_NET "169.254.0.0" + +#define MAC_DST_FWD "00:11:22:33:44:55" +#define MAC_DST "00:22:33:44:55:66" + +#define IFADDR_STR_LEN 18 +#define PING_ARGS "-i 0.2 -c 3 -w 10 -q" + +#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" +#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" +#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" + +#define TIMEOUT_MILLIS 10000 + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; + +static int write_file(const char *path, const char *newval) +{ + FILE *f; + + f = fopen(path, "r+"); + if (!f) + return -1; + if (fwrite(newval, strlen(newval), 1, f) != 1) { + log_err("writing to %s failed", path); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +struct nstoken { + int orig_netns_fd; +}; + +static int setns_by_fd(int nsfd) +{ + int err; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + + if (!ASSERT_OK(err, "setns")) + return err; + + /* Switch /sys to the new namespace so that e.g. /sys/class/net + * reflects the devices in the new namespace. + */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "unshare")) + return err; + + err = umount2("/sys", MNT_DETACH); + if (!ASSERT_OK(err, "umount2 /sys")) + return err; + + err = mount("sysfs", "/sys", "sysfs", 0, NULL); + if (!ASSERT_OK(err, "mount /sys")) + return err; + + err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); + if (!ASSERT_OK(err, "mount /sys/fs/bpf")) + return err; + + return 0; +} + +/** + * open_netns() - Switch to specified network namespace by name. + * + * Returns token with which to restore the original namespace + * using close_netns(). + */ +static struct nstoken *open_netns(const char *name) +{ + int nsfd; + char nspath[PATH_MAX]; + int err; + struct nstoken *token; + + token = malloc(sizeof(struct nstoken)); + if (!ASSERT_OK_PTR(token, "malloc token")) + return NULL; + + token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + goto fail; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (!ASSERT_GE(nsfd, 0, "open netns fd")) + goto fail; + + err = setns_by_fd(nsfd); + if (!ASSERT_OK(err, "setns_by_fd")) + goto fail; + + return token; +fail: + free(token); + return NULL; +} + +static void close_netns(struct nstoken *token) +{ + ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + free(token); +} + +static int netns_setup_namespaces(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); + if (!ASSERT_OK(system(cmd), cmd)) + return -1; + ns++; + } + return 0; +} + +struct netns_setup_result { + int ifindex_veth_src_fwd; + int ifindex_veth_dst_fwd; +}; + +static int get_ifaddr(const char *name, char *ifaddr) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); + if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +static int get_ifindex(const char *name) +{ + char path[PATH_MAX]; + char buf[32]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(buf, 1, sizeof(buf), f); + if (!ASSERT_GT(ret, 0, "fread ifindex")) { + fclose(f); + return -1; + } + fclose(f); + return atoi(buf); +} + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +static int netns_setup_links_and_routes(struct netns_setup_result *result) +{ + struct nstoken *nstoken = NULL; + char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; + + SYS("ip link add veth_src type veth peer name veth_src_fwd"); + SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + + SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS("ip link set veth_dst address " MAC_DST); + + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + goto fail; + + result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); + if (result->ifindex_veth_src_fwd < 0) + goto fail; + result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); + if (result->ifindex_veth_dst_fwd < 0) + goto fail; + + SYS("ip link set veth_src netns " NS_SRC); + SYS("ip link set veth_src_fwd netns " NS_FWD); + SYS("ip link set veth_dst_fwd netns " NS_FWD); + SYS("ip link set veth_dst netns " NS_DST); + + /** setup in 'src' namespace */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto fail; + + SYS("ip addr add " IP4_SRC "/32 dev veth_src"); + SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS("ip link set dev veth_src up"); + + SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + + SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + + close_netns(nstoken); + + /** setup in 'fwd' namespace */ + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + goto fail; + + /* The fwd netns automatically gets a v6 LL address / routes, but also + * needs v4 one in order to start ARP probing. IP4_NET route is added + * to the endpoints so that the ARP processing will reply. + */ + SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS("ip link set dev veth_src_fwd up"); + SYS("ip link set dev veth_dst_fwd up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + + close_netns(nstoken); + + /** setup in 'dst' namespace */ + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) + goto fail; + + SYS("ip addr add " IP4_DST "/32 dev veth_dst"); + SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS("ip link set dev veth_dst up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + close_netns(nstoken); + + return 0; +fail: + if (nstoken) + close_netns(nstoken); + return -1; +} + +static int netns_load_bpf(void) +{ + SYS("tc qdisc add dev veth_src_fwd clsact"); + SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + return 0; +fail: + return -1; +} + +static void test_tcp(int family, const char *addr, __u16 port) +{ + int listen_fd = -1, accept_fd = -1, client_fd = -1; + char buf[] = "testing testing"; + int n; + struct nstoken *nstoken; + + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) + return; + + listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); + if (!ASSERT_GE(listen_fd, 0, "listen")) + goto done; + + close_netns(nstoken); + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + + client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto done; + + if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) + goto done; + + n = write(client_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + + n = read(accept_fd, buf, sizeof(buf)); + ASSERT_EQ(n, sizeof(buf), "recv from server"); + +done: + if (nstoken) + close_netns(nstoken); + if (listen_fd >= 0) + close(listen_fd); + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); +} + +static int test_ping(int family, const char *addr) +{ + const char *ping = family == AF_INET6 ? "ping6" : "ping"; + + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); + return 0; +fail: + return -1; +} + +static void test_connectivity(void) +{ + test_tcp(AF_INET, IP4_DST, IP4_PORT); + test_ping(AF_INET, IP4_DST); + test_tcp(AF_INET6, IP6_DST, IP6_PORT); + test_ping(AF_INET6, IP6_DST); +} + +static int set_forwarding(bool enable) +{ + int err; + + err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) + return err; + + err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv6.forwarding=0")) + return err; + + return 0; +} + +static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken = NULL; + struct test_tc_neigh_fib *skel = NULL; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_neigh_fib__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) + goto done; + + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + /* bpf_fib_lookup() checks if forwarding is enabled */ + if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_neigh_fib__destroy(skel); + close_netns(nstoken); +} + +static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken = NULL; + struct test_tc_neigh *skel = NULL; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_neigh__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) + goto done; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_neigh__load(skel); + if (!ASSERT_OK(err, "test_tc_neigh__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_neigh__destroy(skel); + close_netns(nstoken); +} + +static void test_tc_redirect_peer(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken; + struct test_tc_peer *skel; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto done; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_peer__destroy(skel); + close_netns(nstoken); +} + +static int tun_open(char *name) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) + return -1; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TUN | IFF_NO_PI; + if (*name) + strncpy(ifr.ifr_name, name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl TUNSETIFF")) + goto fail; + + SYS("ip link set dev %s up", name); + + return fd; +fail: + close(fd); + return -1; +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +enum { + SRC_TO_TARGET = 0, + TARGET_TO_SRC = 1, +}; + +static int tun_relay_loop(int src_fd, int target_fd) +{ + fd_set rfds, wfds; + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + + for (;;) { + char buf[1500]; + int direction, nread, nwrite; + + FD_SET(src_fd, &rfds); + FD_SET(target_fd, &rfds); + + if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { + log_err("select failed"); + return 1; + } + + direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; + + nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); + if (nread < 0) { + log_err("read failed"); + return 1; + } + + nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); + if (nwrite != nread) { + log_err("write failed"); + return 1; + } + } +} + +static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel = NULL; + struct nstoken *nstoken = NULL; + int err; + int tunnel_pid = -1; + int src_fd, target_fd; + int ifindex; + + /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. + * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those + * expose the L2 headers encapsulating the IP packet to BPF and hence + * don't have skb in suitable state for this test. Alternative to TUN/TAP + * would be e.g. Wireguard which would appear as a pure L3 device to BPF, + * but that requires much more complicated setup. + */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) + return; + + src_fd = tun_open("tun_src"); + if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) + goto fail; + + close_netns(nstoken); + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) + goto fail; + + target_fd = tun_open("tun_fwd"); + if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) + goto fail; + + tunnel_pid = fork(); + if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) + goto fail; + + if (tunnel_pid == 0) + exit(tun_relay_loop(src_fd, target_fd)); + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto fail; + + ifindex = get_ifindex("tun_fwd"); + if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + goto fail; + + skel->rodata->IFINDEX_SRC = ifindex; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto fail; + + err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto fail; + + /* Load "tc_src_l3" to the tun_fwd interface to redirect packets + * towards dst, and "tc_dst" to redirect packets + * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + */ + SYS("tc qdisc add dev tun_fwd clsact"); + SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + /* Setup route and neigh tables */ + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + + SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + + SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto fail; + + test_connectivity(); + +fail: + if (tunnel_pid > 0) { + kill(tunnel_pid, SIGTERM); + waitpid(tunnel_pid, NULL, 0); + } + if (src_fd >= 0) + close(src_fd); + if (target_fd >= 0) + close(target_fd); + if (skel) + test_tc_peer__destroy(skel); + if (nstoken) + close_netns(nstoken); +} + +#define RUN_TEST(name) \ + ({ \ + struct netns_setup_result setup_result; \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ + if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ + "setup links and routes")) \ + test_ ## name(&setup_result); \ + netns_setup_namespaces("delete"); \ + } \ + }) + +static void *test_tc_redirect_run_tests(void *arg) +{ + RUN_TEST(tc_redirect_peer); + RUN_TEST(tc_redirect_peer_l3); + RUN_TEST(tc_redirect_neigh); + RUN_TEST(tc_redirect_neigh_fib); + return NULL; +} + +void test_tc_redirect(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index b985ac4e7a81..0c93d326a663 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,17 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) @@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index d82ed3457030..f7ab69cf018e 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { return tc_redir(skb); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index fc84a7685aa2..fe818cd5f010 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -5,41 +5,59 @@ #include <linux/bpf.h> #include <linux/stddef.h> #include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/ip.h> #include <bpf/bpf_helpers.h> -enum { - dev_src, - dev_dst, -}; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; +static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; -static __always_inline int get_dev_ifindex(int which) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + return TC_ACT_SHOT; +} - return ifindex ? *ifindex : 0; +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { - return TC_ACT_SHOT; + return bpf_redirect_peer(IFINDEX_DST, 0); } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress_l3") +int tc_dst_l3(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); + return bpf_redirect(IFINDEX_SRC, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress_l3") +int tc_src_l3(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); + __u16 proto = skb->protocol; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0) + return TC_ACT_SHOT; + + return bpf_redirect_peer(IFINDEX_DST, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh deleted file mode 100755 index 8868aa1ca902..000000000000 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ /dev/null @@ -1,216 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the -# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace -# switch from ingress side; it also installs a checker prog on the egress side -# to drop unexpected traffic. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that needed tools are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi -command -v perl >/dev/null 2>&1 || \ - { echo >&2 "perl is not available"; exit 1; } -command -v jq >/dev/null 2>&1 || \ - { echo >&2 "jq is not available"; exit 1; } -command -v bpftool >/dev/null 2>&1 || \ - { echo >&2 "bpftool is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -netns_cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -netns_setup() -{ - ip netns add "${NS_SRC}" - ip netns add "${NS_FWD}" - ip netns add "${NS_DST}" - - ip link add veth_src type veth peer name veth_src_fwd - ip link add veth_dst type veth peer name veth_dst_fwd - - ip link set veth_src netns ${NS_SRC} - ip link set veth_src_fwd netns ${NS_FWD} - - ip link set veth_dst netns ${NS_DST} - ip link set veth_dst_fwd netns ${NS_FWD} - - ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src - ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - - # The fwd netns automatically get a v6 LL address / routes, but also - # needs v4 one in order to start ARP probing. IP4_NET route is added - # to the endpoints so that the ARP processing will reply. - - ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd - ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - - ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad - ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - - ip -netns ${NS_SRC} link set dev veth_src up - ip -netns ${NS_FWD} link set dev veth_src_fwd up - - ip -netns ${NS_DST} link set dev veth_dst up - ip -netns ${NS_FWD} link set dev veth_dst_fwd up - - ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global - ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - - ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - - ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global - ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - - ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - - fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) - fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - - ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - - ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst -} - -netns_test_connectivity() -{ - set +e - - ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" - ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - - TEST="TCPv4 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="TCPv6 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv4 connectivity test" - ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - set -e -} - -hex_mem_str() -{ - perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 -} - -netns_setup_bpf() -{ - local obj=$1 - local use_forwarding=${2:-0} - - ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress - - ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress - - if [ "$use_forwarding" -eq "1" ]; then - # bpf_fib_lookup() checks if forwarding is enabled - ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 - return 0 - fi - - veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) - veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) - - progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') - for prog in $progs; do - map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') - if [ ! -z "$map" ]; then - bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) - bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) - fi - done -} - -trap netns_cleanup EXIT -set -e - -netns_setup -netns_setup_bpf test_tc_neigh.o -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_neigh_fib.o 1 -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_peer.o -netns_test_connectivity diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 07eaa04412ae..8ab94d65f3d5 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -295,8 +295,6 @@ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index e5913fd3b903..7ae2859d495c 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -300,8 +300,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -371,8 +369,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -472,8 +468,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -766,8 +760,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index cf69b2fcce59..dd61118df66e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ $(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ $(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ $(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore new file mode 100644 index 000000000000..448eeb4590fc --- /dev/null +++ b/tools/testing/selftests/nci/.gitignore @@ -0,0 +1 @@ +/nci_dev diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 78ddf5e11625..8e83cf91513a 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -43,7 +43,7 @@ static struct { siginfo_t first_siginfo; /* First observed siginfo_t. */ } ctx; -/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ #define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) @@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98c3b647f54d..e3d5c77a8612 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET_SET(_regs, _val) \ do { \ typeof(_val) _result = (_val); \ - /* \ - * A syscall error is signaled by CR0 SO bit \ - * and the code is stored as a positive value. \ - */ \ - if (_result < 0) { \ - SYSCALL_RET(_regs) = -_result; \ - (_regs).ccr |= 0x10000000; \ - } else { \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ SYSCALL_RET(_regs) = _result; \ - (_regs).ccr &= ~0x10000000; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ } \ } while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index 1cda2e11b3ad..773c5027553d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -9,11 +9,11 @@ "setup": [ "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536", - "expExitCode": "2", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536", + "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc", - "matchCount": "0", + "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536", + "matchCount": "1", "teardown": [ "$IP link del dev $DUMMY" ] |