diff options
Diffstat (limited to 'tools')
114 files changed, 2446 insertions, 882 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 100ad3dc091a..3feac0482fe9 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -196,22 +196,22 @@ static int get_family_id(int sd) static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" - "IO %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "SWAP %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "RECLAIM %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "THRASHING%12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "COMPACT %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "WPCOPY %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "IRQ %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n", + printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IO %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "SWAP %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "RECLAIM %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "THRASHING%12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "COMPACT %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "WPCOPY %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IRQ %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n", "count", "real total", "virtual total", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile index 7b42feedf647..de4f1b66ef01 100644 --- a/tools/arch/arm64/tools/Makefile +++ b/tools/arch/arm64/tools/Makefile @@ -13,12 +13,6 @@ AWK ?= awk MKDIR ?= mkdir RM ?= rm -ifeq ($(V),1) -Q = -else -Q = @ -endif - arm64_tools_dir = $(top_srcdir)/arch/arm64/tools arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 243b79f2b451..062bbd6cd048 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - FEATURE_USER = .bpf FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled FEATURE_DISPLAY = libbfd diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 4315652678b9..bf843f328812 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -5,12 +5,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man8dir = $(mandir)/man8 diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dd9f3ec84201..6ea4823b770c 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 4b8079f294f6..afbddea3a39c 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) - Q = msg = else - Q = @ ifeq ($(silent),1) msg = else diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index c4f1f1735af6..e49203ebd48c 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -26,10 +26,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(V),1) -Q = -else -Q = @ +ifneq ($(V),1) MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 endif diff --git a/tools/build/Makefile b/tools/build/Makefile index 18ad131f6ea7..63ef21878761 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) export HOSTCC HOSTLD HOSTAR -ifeq ($(V),1) - Q = -else - Q = @ -endif - -export Q srctree CC LD +export srctree CC LD MAKEFLAGS := --no-print-directory build := -f $(srctree)/tools/build/Makefile.build dir=. obj diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 857a5f7b413d..168140f8e646 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - INCLUDES = -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include @@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 3a9b2140aa04..e9a7ac2c062e 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) # Set compile option CFLAGS diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 8890fd57b110..a1f5e388644d 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index bcac7ebfb51f..d7e5e8902af8 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -24,8 +24,8 @@ #include <signal.h> #include <inttypes.h> #include <sys/types.h> -#include <sys/errno.h> -#include <sys/fcntl.h> +#include <errno.h> +#include <fcntl.h> #include <sys/mount.h> #include <sys/statfs.h> #include <sys/mman.h> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index f56e27727534..7a65948892e5 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" AWK = awk MKDIR = mkdir -ifeq ($(V),1) - Q = -else - Q = @ -endif - BUILD_ORC := n ifeq ($(SRCARCH),x86) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 753dbc4f8198..ce973d9d8e6d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && str_ends_with(func->name, "_fail")); @@ -1975,6 +1976,14 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, reloc_addend(reloc) == pfunc->offset) break; + /* + * Clang sometimes leaves dangling unused jump table entries + * which point to the end of the function. Ignore them. + */ + if (reloc->sym->sec == pfunc->sec && + reloc_addend(reloc) == pfunc->offset + pfunc->len) + goto next; + dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!dest_insn) break; @@ -1992,6 +2001,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, alt->insn = dest_insn; alt->next = insn->alts; insn->alts = alt; +next: prev_offset = reloc_offset(reloc); } @@ -2264,7 +2274,7 @@ static int read_annotate(struct objtool_file *file, if (sec->sh.sh_entsize != 8) { static bool warned = false; - if (!warned) { + if (!warned && opts.verbose) { WARN("%s: dodgy linker, sh_entsize != 8", sec->name); warned = true; } @@ -2462,13 +2472,14 @@ static void mark_rodata(struct objtool_file *file) * * - .rodata: can contain GCC switch tables * - .rodata.<func>: same, if -fdata-sections is being used - * - .rodata..c_jump_table: contains C annotated jump tables + * - .data.rel.ro.c_jump_table: contains C annotated jump tables * * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) || + !strncmp(sec->name, ".data.rel.ro", 12)) { sec->rodata = true; found = true; } diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index e7ee7ffccefd..e049679bb17b 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -10,7 +10,7 @@ #include <objtool/check.h> #include <objtool/elf.h> -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table" struct special_alt { struct list_head list; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index b2174894f9f7..6bb7edda3094 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,7 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) -NORETURN(bch2_trans_unlocked_error) +NORETURN(bch2_trans_unlocked_or_in_restart_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 55d6ce9ea52f..05c083bb1122 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -161,47 +161,6 @@ export VPATH SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source) endif -# Beautify output -# --------------------------------------------------------------------------- -# -# Most of build commands in Kbuild start with "cmd_". You can optionally define -# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from -# that command is printed by default. -# -# e.g.) -# quiet_cmd_depmod = DEPMOD $(MODLIB) -# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) -# -# A simple variant is to prefix commands with $(Q) - that's useful -# for commands that shall be hidden in non-verbose mode. -# -# $(Q)$(MAKE) $(build)=scripts/basic -# -# To put more focus on warnings, be less verbose as default -# Use 'make V=1' to see the full commands - -ifeq ($(V),1) - quiet = - Q = -else - quiet=quiet_ - Q=@ -endif - -# If the user is running make -s (silent mode), suppress echoing of commands -# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. -ifeq ($(filter 3.%,$(MAKE_VERSION)),) -short-opts := $(firstword -$(MAKEFLAGS)) -else -short-opts := $(filter-out --%,$(MAKEFLAGS)) -endif - -ifneq ($(findstring s,$(short-opts)),) - quiet=silent_ -endif - -export quiet Q - # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f3e15e9efa76..7849405614b1 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -270,8 +270,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) + +int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) + struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, @@ -404,6 +412,17 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) return (const struct cpumask *)mask; } +/* + * Return true if task @p cannot migrate to a different CPU, false + * otherwise. + */ +static inline bool is_migration_disabled(const struct task_struct *p) +{ + if (bpf_core_field_exists(p->migration_disabled)) + return p->migration_disabled; + return false; +} + /* rcu */ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -421,7 +440,7 @@ void bpf_rcu_read_unlock(void) __ksym; */ static inline s64 time_delta(u64 after, u64 before) { - return (s64)(after - before) > 0 ? : 0; + return (s64)(after - before) > 0 ? (s64)(after - before) : 0; } /** diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0aa4005017c7..45f4abef7064 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -136,6 +136,33 @@ else NO_SUBDIR = : endif +# Beautify output +# --------------------------------------------------------------------------- +# +# Most of build commands in Kbuild start with "cmd_". You can optionally define +# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from +# that command is printed by default. +# +# e.g.) +# quiet_cmd_depmod = DEPMOD $(MODLIB) +# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) +# +# A simple variant is to prefix commands with $(Q) - that's useful +# for commands that shall be hidden in non-verbose mode. +# +# $(Q)$(MAKE) $(build)=scripts/basic +# +# To put more focus on warnings, be less verbose as default +# Use 'make V=1' to see the full commands + +ifeq ($(V),1) + quiet = + Q = +else + quiet = quiet_ + Q = @ +endif + # If the user is running make -s (silent mode), suppress echoing of commands # make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. ifeq ($(filter 3.%,$(MAKE_VERSION)),) @@ -146,8 +173,11 @@ endif ifneq ($(findstring s,$(short-opts)),) silent=1 + quiet=silent_ endif +export quiet Q + # # Define a callable command for descending to a new directory # diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph index f14bdfedee8f..b6196ee5065a 100755 --- a/tools/sound/dapm-graph +++ b/tools/sound/dapm-graph @@ -10,7 +10,7 @@ set -eu STYLE_COMPONENT_ON="color=dodgerblue;style=bold" STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90" -STYLE_NODE_ON="shape=box,style=bold,color=green4" +STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white" STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" # Print usage and exit diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs index eb6a4fea8c79..f7f9e7088bb3 100644 --- a/tools/testing/selftests/bpf/Makefile.docs +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -7,12 +7,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man2dir = $(mandir)/man2 diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 66191ae9863c..79c3ccadb962 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd, static void fetch_and_validate(int outer_map_fd, struct bpf_map_batch_opts *opts, - __u32 batch_size, bool delete_entries) + __u32 batch_size, bool delete_entries, + bool has_holes) { - __u32 *fetched_keys, *fetched_values, total_fetched = 0; - __u32 batch_key = 0, fetch_count, step_size; - int err, max_entries = OUTER_MAP_ENTRIES; + int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes; + __u32 *fetched_keys, *fetched_values, total_fetched = 0, i; + __u32 batch_key = 0, fetch_count, step_size = batch_size; __u32 value_size = sizeof(__u32); /* Total entries needs to be fetched */ @@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd, "Memory allocation failed for fetched_keys or fetched_values", "error=%s\n", strerror(errno)); - for (step_size = batch_size; - step_size <= max_entries; - step_size += batch_size) { + /* hash map may not always return full batch */ + for (i = 0; i < OUTER_MAP_ENTRIES; i++) { fetch_count = step_size; err = delete_entries ? bpf_map_lookup_and_delete_batch(outer_map_fd, @@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd, if (err && errno == ENOSPC) { /* Fetch again with higher batch size */ total_fetched = 0; + step_size += batch_size; continue; } @@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd, } static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, - enum bpf_map_type inner_map_type) + enum bpf_map_type inner_map_type, + bool has_holes) { + __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes; __u32 *outer_map_keys, *inner_map_fds; - __u32 max_entries = OUTER_MAP_ENTRIES; LIBBPF_OPTS(bpf_map_batch_opts, opts); __u32 value_size = sizeof(__u32); int batch_size[2] = {5, 10}; __u32 map_index, op_index; int outer_map_fd, ret; - outer_map_keys = calloc(max_entries, value_size); - inner_map_fds = calloc(max_entries, value_size); + outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size); + inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size); CHECK((!outer_map_keys || !inner_map_fds), "Memory allocation failed for outer_map_keys or inner_map_fds", "error=%s\n", strerror(errno)); @@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ? 9 : 1000) - map_index; + /* This condition is only meaningful for array of maps. + * + * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say + * max_entries is short for n, then outer_map_keys looks like: + * + * [n, n-1, ... 2, 1] + * + * We change it to + * + * [n, n-1, ... 2, 0] + * + * So it will leave key 1 as a hole. It will serve to test the + * correctness when batch on an array: a "non-exist" key might be + * actually allocated and returned from key iteration. + */ + if (has_holes) + outer_map_keys[max_entries - 1]--; + /* batch operation - map_update */ ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, inner_map_fds, &max_entries, &opts); @@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, /* batch operation - map_lookup */ for (op_index = 0; op_index < 2; ++op_index) fetch_and_validate(outer_map_fd, &opts, - batch_size[op_index], false); + batch_size[op_index], false, + has_holes); /* batch operation - map_lookup_delete */ if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) fetch_and_validate(outer_map_fd, &opts, - max_entries, true /*delete*/); + max_entries, true /*delete*/, + has_holes); /* close all map fds */ - for (map_index = 0; map_index < max_entries; map_index++) + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) close(inner_map_fds[map_index]); close(outer_map_fd); @@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, void test_map_in_map_batch_ops_array(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true); + printf("%s:PASS with inner ARRAY map with holes\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true); + printf("%s:PASS with inner HASH map with holes\n", __func__); } void test_map_in_map_batch_ops_hash(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c index 3729fbfd3084..80b153d3ddec 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c @@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd) static int set_port_drop(int pf, bool multi_port) { + char dst_port[16]; + + snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER); + SYS(fail, "tc qdisc add dev lo ingress"); - SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s", + SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s", "dev lo", "parent FFFF:", "protocol", pf == PF_INET6 ? "ipv6" : "ip", @@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port) "flower", "ip_proto udp", "src_port", multi_port ? "8-10" : "9", + "dst_port", dst_port, "action drop"); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 884ad87783d5..1e3e4392dcca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -111,31 +111,35 @@ out: static void test_sockmap_vsock_delete_on_close(void) { - int err, c, p, map; - const int zero = 0; - - err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); - if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) - return; + int map, c, p, err, zero = 0; map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (!ASSERT_GE(map, 0, "bpf_map_create")) { - close(c); - goto out; - } + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; - err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); - close(c); - if (!ASSERT_OK(err, "bpf_map_update")) - goto out; + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; + + if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST)) + goto close_socks; + + xclose(c); + xclose(p); + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); ASSERT_OK(err, "after close(), bpf_map_update"); -out: - close(p); - close(map); +close_socks: + xclose(c); + xclose(p); +close_map: + xclose(map); } static void test_skmsg_helpers(enum bpf_map_type map_type) @@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_EQ(err, 1, "epoll_wait(fd)")) goto out_close; - n = recv(c1, &b, 1, SOCK_NONBLOCK); - ASSERT_EQ(n, 0, "recv_timeout(fin)"); + n = recv(c1, &b, 1, MSG_DONTWAIT); + ASSERT_EQ(n, 0, "recv(fin)"); out_close: close(c1); close(p1); @@ -531,57 +535,6 @@ out: test_sockmap_pass_prog__destroy(skel); } -static void test_sockmap_stream_pass(void) -{ - int zero = 0, sent, recvd; - int verdict, parser; - int err, map; - int c = -1, p = -1; - struct test_sockmap_pass_prog *pass = NULL; - char snd[256] = "0123456789"; - char rcv[256] = "0"; - - pass = test_sockmap_pass_prog__open_and_load(); - verdict = bpf_program__fd(pass->progs.prog_skb_verdict); - parser = bpf_program__fd(pass->progs.prog_skb_parser); - map = bpf_map__fd(pass->maps.sock_map_rx); - - err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) - goto out; - - err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) - goto out; - - err = create_pair(AF_INET, SOCK_STREAM, &c, &p); - if (err) - goto out; - - /* sk_data_ready of 'p' will be replaced by strparser handler */ - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) - goto out_close; - - /* - * as 'prog_skb_parser' return the original skb len and - * 'prog_skb_verdict' return SK_PASS, the kernel will just - * pass it through to original socket 'p' - */ - sent = xsend(c, snd, sizeof(snd), 0); - ASSERT_EQ(sent, sizeof(snd), "xsend(c)"); - - recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK, - IO_TIMEOUT_SEC); - ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)"); - -out_close: - close(c); - close(p); - -out: - test_sockmap_pass_prog__destroy(pass); -} static void test_sockmap_skb_verdict_fionread(bool pass_prog) { @@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); /* On DROP test there will be no data to read */ if (pass_prog) { - recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC); + recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC); ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)"); } @@ -1061,6 +1014,34 @@ destroy: test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_vsock_unconnected(void) +{ + struct sockaddr_storage addr; + int map, s, zero = 0; + socklen_t alen; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; + + s = xsocket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) + goto close_map; + + /* Fail connect(), but trigger transport assignment. */ + init_addr_loopback(AF_VSOCK, &addr, &alen); + if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect")) + goto close_sock; + + ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update"); + +close_sock: + xclose(s); +close_map: + xclose(map); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -1101,8 +1082,6 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); if (test__start_subtest("sockmap skb_verdict shutdown")) test_sockmap_skb_verdict_shutdown(); - if (test__start_subtest("sockmap stream parser and verdict pass")) - test_sockmap_stream_pass(); if (test__start_subtest("sockmap skb_verdict fionread")) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) @@ -1127,4 +1106,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); if (test__start_subtest("sockmap skb_verdict vsock poll")) test_sockmap_skb_verdict_vsock_poll(); + if (test__start_subtest("sockmap vsock unconnected")) + test_sockmap_vsock_unconnected(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c new file mode 100644 index 000000000000..621b3b71888e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <netinet/tcp.h> +#include <test_progs.h> +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_strp.skel.h" + +#define STRP_PKT_HEAD_LEN 4 +#define STRP_PKT_BODY_LEN 6 +#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN) + +static const char packet[STRP_PKT_FULL_LEN] = "head+body\0"; +static const int test_packet_num = 100; + +/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready + * with sk held if an skb exists in sk_receive_queue. Then for the + * data_ready implementation of strparser, it will delay the read + * operation if sk is held and EAGAIN is returned. + */ +static int sockmap_strp_consume_pre_data(int p) +{ + int recvd; + bool retried = false; + char rcv[10]; + +retry: + errno = 0; + recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1); + if (recvd < 0 && errno == EAGAIN && retried == false) { + /* On the first call, EAGAIN will certainly be returned. + * A 1-second wait is enough for the workqueue to finish. + */ + sleep(1); + retried = true; + goto retry; + } + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "data mismatch")) + return -1; + return 0; +} + +static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass, + bool need_parser) +{ + struct test_sockmap_strp *strp = NULL; + int verdict, parser; + int err; + + strp = test_sockmap_strp__open_and_load(); + *out_map = bpf_map__fd(strp->maps.sock_map); + + if (need_parser) + parser = bpf_program__fd(strp->progs.prog_skb_parser_partial); + else + parser = bpf_program__fd(strp->progs.prog_skb_parser); + + if (pass) + verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass); + else + verdict = bpf_program__fd(strp->progs.prog_skb_verdict); + + err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) + goto err; + + err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto err; + + return strp; +err: + test_sockmap_strp__destroy(strp); + return NULL; +} + +/* Dispatch packets to different socket by packet size: + * + * ------ ------ + * | pkt4 || pkt1 |... > remote socket + * ------ ------ / ------ ------ + * | pkt8 | pkt7 |... + * ------ ------ \ ------ ------ + * | pkt3 || pkt2 |... > local socket + * ------ ------ + */ +static void test_sockmap_strp_dispatch_pkt(int family, int sotype) +{ + int i, j, zero = 0, one = 1, recvd; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + int test_cnt = 6; + char rcv[10]; + struct { + char data[7]; + int data_len; + int send_cnt; + int *receiver; + } send_dir[2] = { + /* data expected to deliver to local */ + {"llllll", 6, 0, &p0}, + /* data expected to deliver to remote */ + {"rrrrr", 5, 0, &c1} + }; + + strp = sockmap_strp_init(&map, false, false); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)")) + goto out_close; + + /* deliver data with data size greater than 5 to local */ + strp->data->verdict_max_size = 5; + + for (i = 0; i < test_cnt; i++) { + int d = i % 2; + + xsend(c0, send_dir[d].data, send_dir[d].data_len, 0); + send_dir[d].send_cnt++; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < send_dir[i].send_cnt; j++) { + int expected = send_dir[i].data_len; + + recvd = recv_timeout(*send_dir[i].receiver, rcv, + expected, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, expected, "recv_timeout()")) + goto out_close; + if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd), + "data mismatch")) + goto out_close; + } + } +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +/* We have multiple packets in one skb + * ------------ ------------ ------------ + * | packet1 | packet2 | ... + * ------------ ------------ ------------ + */ +static void test_sockmap_strp_multiple_pkt(int family, int sotype) +{ + int i, zero = 0; + int sent, recvd, total; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char *snd = NULL, *rcv = NULL; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* construct multiple packets in one buffer */ + total = test_packet_num * STRP_PKT_FULL_LEN; + snd = malloc(total); + rcv = malloc(total + 1); + if (!ASSERT_TRUE(snd, "malloc(snd)") || + !ASSERT_TRUE(rcv, "malloc(rcv)")) + goto out_close; + + for (i = 0; i < test_packet_num; i++) { + memcpy(snd + i * STRP_PKT_FULL_LEN, + packet, STRP_PKT_FULL_LEN); + } + + sent = xsend(c, snd, total, 0); + if (!ASSERT_EQ(sent, total, "xsend(c)")) + goto out_close; + + /* try to recv one more byte to avoid truncation check */ + recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, total, "recv(rcv)")) + goto out_close; + + /* we sent TCP segment with multiple encapsulation + * then check whether packets are handled correctly + */ + if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch")) + goto out_close; + +out_close: + close(c); + close(p); + if (snd) + free(snd); + if (rcv) + free(rcv); +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with partial read */ +static void test_sockmap_strp_partial_read(int family, int sotype) +{ + int zero = 0, recvd, off; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* 1.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data")) + goto out_close; + + /* 1.2 send remaining head and body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + + /* 2.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + + /* 2.2 send remaining head and partial body, 1 byte body left */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0); + off = STRP_PKT_FULL_LEN - 1; + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data")) + goto out_close; + + /* 2.3 send remaining body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test simple socket read/write with strparser + FIONREAD */ +static void test_sockmap_strp_pass(int family, int sotype, bool fionread) +{ + int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail; + int err, map; + int c = -1, p = -1; + int test_cnt = 10, i; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* inject some data before bpf process, it should be read + * correctly because we check sk_receive_queue in + * tcp_bpf_recvmsg_parser(). + */ + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)")) + goto out_close; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out_close; + + /* consume previous data we injected */ + if (sockmap_strp_consume_pre_data(p)) + goto out_close; + + /* Previously, we encountered issues such as deadlocks and + * sequence errors that resulted in the inability to read + * continuously. Therefore, we perform multiple iterations + * of testing here. + */ + for (i = 0; i < test_cnt; i++) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(c)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "memcmp, data mismatch")) + goto out_close; + } + + if (fionread) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)")) + goto out_close; + + err = ioctl(p, FIONREAD, &avail); + if (!ASSERT_OK(err, "ioctl(FIONREAD) error") || + !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "second memcmp, data mismatch")) + goto out_close; + } + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with verdict mode */ +static void test_sockmap_strp_verdict(int family, int sotype) +{ + int zero = 0, one = 1, sent, recvd, off; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, false, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + /* We simulate a reverse proxy server. + * When p0 receives data from c0, we forward it to c1. + * From c1's perspective, it will consider this data + * as being sent by p1. + */ + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)")) + goto out_close; + + recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "received data does not match the sent data")) + goto out_close; + + /* send again to ensure the stream is functioning correctly. */ + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)")) + goto out_close; + + /* partial read */ + off = STRP_PKT_FULL_LEN / 2; + recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "partial received data does not match the sent data")) + goto out_close; + +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +void test_sockmap_strp(void) +{ + if (test__start_subtest("sockmap strp tcp pass")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp v6 pass")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp pass fionread")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp v6 pass fionread")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp verdict")) + test_sockmap_strp_verdict(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp v6 verdict")) + test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp partial read")) + test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp multiple packets")) + test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp dispatch")) + test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index c7f74f068e78..df27535995af 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* send a packet to trigger any potential bugs in there */ - char data[10] = {}; + char data[ETH_HLEN] = {}; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 27ffed17d4be..461ab18705d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void) __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd; struct nstoken *nstoken = NULL; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); @@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void) struct nstoken *nstoken = NULL; __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c new file mode 100644 index 000000000000..dde3d5bec515 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +int verdict_max_size = 10000; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + __u32 one = 1; + + if (skb->len > verdict_max_size) + return SK_PASS; + + return bpf_sk_redirect_map(skb, &sock_map, one, 0); +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser_partial(struct __sk_buff *skb) +{ + /* agreement with the test program on a 4-byte size header + * and 6-byte body. + */ + if (skb->len < 4) { + /* need more header to determine full length */ + return 0; + } + /* return full length decoded from header. + * the return value may be larger than skb->len which + * means framework must wait body coming. + */ + return 10; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 29eb9568633f..0a187ff725cc 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void) return val->index; } +SEC("socket") +__description("doesn't reject UINT64_MAX as s64 for irrelevant maps") +__success __retval(42) +unsigned int doesnt_reject_irrelevant_maps(void) +{ + __u64 key = 0xFFFFFFFFFFFFFFFF; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_hash_48b, &key); + if (val) + return val->index; + + return 42; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh index 3f45512fb512..7406c24be1ac 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Test the special cpuset v1 hotplug case where a cpuset become empty of diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff543..58f3291fed12 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( real_nr_regions, min_nr_regions, max_nr_regions) + collected_nr_regions.sort() if (collected_nr_regions[0] < min_nr_regions or collected_nr_regions[-1] > max_nr_regions): print('fail %s' % test_name) @@ -109,6 +110,7 @@ def main(): attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs attrs.min_nr_regions = 3 attrs.max_nr_regions = 7 + attrs.update_us = 100000 err = kdamonds.kdamonds[0].commit() if err is not None: proc.terminate() diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd..57c4937aaed2 100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py @@ -51,16 +51,19 @@ def main(): nr_quota_exceeds = scheme.stats.qt_exceeds wss_collected.sort() + nr_expected_quota_exceeds = 0 for wss in wss_collected: if wss > sz_quota: print('quota is not kept: %s > %s' % (wss, sz_quota)) print('collected samples are as below') print('\n'.join(['%d' % wss for wss in wss_collected])) exit(1) + if wss == sz_quota: + nr_expected_quota_exceeds += 1 - if nr_quota_exceeds < len(wss_collected): - print('quota is not always exceeded: %d > %d' % - (len(wss_collected), nr_quota_exceeds)) + if nr_quota_exceeds < nr_expected_quota_exceeds: + print('quota is exceeded less than expected: %d < %d' % + (nr_quota_exceeds, nr_expected_quota_exceeds)) exit(1) if __name__ == '__main__': diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7..f76e0412b564 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc606..7bc148889ca7 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,8 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 394971b25c0b..873f5219e41d 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -2,17 +2,54 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx -from lib.py import EthtoolFamily, NlError +from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv +from lib.py import defer, ethtool, ip -def get_hds(cfg, netnl) -> None: + +def _get_hds_mode(cfg, netnl) -> str: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: raise KsftSkipEx('ring-get not supported by device') if 'tcp-data-split' not in rings: raise KsftSkipEx('tcp-data-split not supported by device') + return rings['tcp-data-split'] + + +def _xdp_onoff(cfg): + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + ip("link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip("link set dev %s xdp off" % cfg.ifname) + + +def _ioctl_ringparam_modify(cfg, netnl) -> None: + """ + Helper for performing a hopefully unimportant IOCTL SET. + IOCTL does not support HDS, so it should not affect the HDS config. + """ + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + + if 'tx' not in rings: + raise KsftSkipEx('setting Tx ring size not supported') + + try: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}") + except CmdExitFailure as e: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}") + defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}") + + +def get_hds(cfg, netnl) -> None: + _get_hds_mode(cfg, netnl) + def get_hds_thresh(cfg, netnl) -> None: try: @@ -104,6 +141,103 @@ def set_hds_thresh_gt(cfg, netnl) -> None: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def ioctl(cfg, netnl) -> None: + mode1 = _get_hds_mode(cfg, netnl) + _ioctl_ringparam_modify(cfg, netnl) + mode2 = _get_hds_mode(cfg, netnl) + + ksft_eq(mode1, mode2) + + +def ioctl_set_xdp(cfg, netnl) -> None: + """ + Like set_xdp(), but we perturb the settings via the legacy ioctl. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _ioctl_ringparam_modify(cfg, netnl) + + _xdp_onoff(cfg) + + +def ioctl_enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + def main() -> None: with NetDrvEnv(__file__, queue_count=3) as cfg: ksft_run([get_hds, @@ -112,7 +246,12 @@ def main() -> None: set_hds_enable, set_hds_thresh_zero, set_hds_thresh_max, - set_hds_thresh_gt], + set_hds_thresh_gt, + set_xdp, + enabled_set_xdp, + ioctl, + ioctl_set_xdp, + ioctl_enabled_set_xdp], args=(cfg, EthtoolFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index eb83e7b48797..93f4b411b378 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -1,49 +1,219 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import os +import random, string, time from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq -from lib.py import NetDrvEpEnv +from lib.py import ksft_eq, KsftSkipEx, KsftFailEx +from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port +from lib.py import ethtool, ip +remote_ifname="" +no_sleep=False -def test_v4(cfg) -> None: +def _test_v4(cfg) -> None: cfg.require_v4() cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote) - -def test_v6(cfg) -> None: +def _test_v6(cfg) -> None: cfg.require_v6() - cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") - cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) - + cmd(f"ping -c 1 -W5 {cfg.remote_v6}") + cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote) + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}") + cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote) -def test_tcp(cfg) -> None: +def _test_tcp(cfg) -> None: cfg.require_cmd("socat", remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, exit_wait=True) as nc: wait_port_listen(port) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", shell=True, host=cfg.remote) - ksft_eq(nc.stdout.strip(), "ping") + ksft_eq(nc.stdout.strip(), test_string) + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536)) with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: wait_port_listen(port, host=cfg.remote) - cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) - ksft_eq(nc.stdout.strip(), "ping") - + cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), test_string) + +def _set_offload_checksum(cfg, netnl, on) -> None: + try: + ethtool(f" -K {cfg.ifname} rx {on} tx {on} ") + except: + return + +def _set_xdp_generic_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_generic_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_sb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) + xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if xdp_info['xdp']['mode'] != 1: + """ + If the interface doesn't support native-mode, it falls back to generic mode. + The mode value 1 is native and 2 is generic. + So it raises an exception if mode is not 1(native mode). + """ + raise KsftSkipEx('device does not support native-XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_native_mb_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) + try: + cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) + except Exception as e: + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + raise KsftSkipEx('device does not support native-multi-buffer XDP') + + if no_sleep != True: + time.sleep(10) + +def _set_xdp_offload_on(cfg) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + try: + cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True) + except Exception as e: + raise KsftSkipEx('device does not support offloaded XDP') + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + + if no_sleep != True: + time.sleep(10) + +def get_interface_info(cfg) -> None: + global remote_ifname + global no_sleep + + remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout + remote_ifname = remote_info.rstrip('\n') + if remote_ifname == "": + raise KsftFailEx('Can not get remote interface') + local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] + if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": + no_sleep=True + if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth": + no_sleep=True + +def set_interface_init(cfg) -> None: + cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) + cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) + cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default(cfg, netnl) -> None: + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + +def test_xdp_generic_sb(cfg, netnl) -> None: + _set_xdp_generic_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_generic_mb(cfg, netnl) -> None: + _set_xdp_generic_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpgeneric off" % cfg.ifname) + +def test_xdp_native_sb(cfg, netnl) -> None: + _set_xdp_native_sb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_native_mb(cfg, netnl) -> None: + _set_xdp_native_mb_on(cfg) + _set_offload_checksum(cfg, netnl, "off") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdp off" % cfg.ifname) + +def test_xdp_offload(cfg, netnl) -> None: + _set_xdp_offload_on(cfg) + _test_v4(cfg) + _test_v6(cfg) + _test_tcp(cfg) + ip("link set dev %s xdpoffload off" % cfg.ifname) def main() -> None: with NetDrvEpEnv(__file__) as cfg: - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + get_interface_info(cfg) + set_interface_init(cfg) + ksft_run([test_default, + test_xdp_generic_sb, + test_xdp_generic_mb, + test_xdp_native_sb, + test_xdp_native_mb, + test_xdp_offload], + args=(cfg, EthtoolFamily())) + set_interface_init(cfg) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..8a518905a9f9 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -45,10 +45,9 @@ def addremove_queues(cfg, nl) -> None: netnl = EthtoolFamily() channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) - if channels['combined-count'] == 0: - rx_type = 'rx' - else: - rx_type = 'combined' + rx_type = 'rx' + if channels.get('combined-count', 0) > 0: + rx_type = 'combined' expected = curr_queues - 1 cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index dc25bcf4f9e2..73f6c6fcecab 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -7,12 +7,42 @@ echo 0 > events/enable echo > dynamic_events PLACE=$FUNCTION_FORK +PLACE2="kmem_cache_free" +PLACE3="schedule_timeout" + +# Some functions may have BPF programs attached, therefore +# count already enabled_functions before tests start +ocnt=`cat enabled_functions | wc -l` echo "f:myevent1 $PLACE" >> dynamic_events + +# Make sure the event is attached and is the only one +grep -q $PLACE enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + echo "f:myevent2 $PLACE%return" >> dynamic_events +# It should till be the only attached function +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q $PLACE2 enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 2)) ]; then + exit_fail +fi + grep -q myevent1 dynamic_events grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events test -d events/fprobes/myevent1 test -d events/fprobes/myevent2 @@ -21,6 +51,34 @@ echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events ! grep -q myevent2 dynamic_events +# should still have 2 left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 2)) ]; then + exit_fail +fi + echo > dynamic_events +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $ocnt ]; then + exit_fail +fi + +echo "f:myevent4 $PLACE" >> dynamic_events + +# Should only have one enabled +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + +echo > dynamic_events + +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $ocnt ]; then + exit_fail +fi + clear_trace diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 0336353bd15f..2839d2612ce3 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw # $3 - target (assumed to be file); only file name will be emitted; # $4 - optional extra arg, emitted as-is, if provided. ifeq ($(V),1) -Q = msg = else -Q = @ msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 4277b983cace..f773f8f99249 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -69,6 +69,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush TEST_GEN_PROGS_x86 += x86/kvm_clock_test TEST_GEN_PROGS_x86 += x86/kvm_pv_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..447e619cf856 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) case ITERATION_MARK_IDLE: mark_vcpu_memory_idle(vm, vcpu_args); break; - }; + } vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aacf80f57439..23593d9eeba9 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -31,15 +31,18 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 - /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -75,6 +78,8 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; static uint64_t iteration; +static uint64_t nr_writes; +static bool vcpu_stop; /* * Guest physical memory offset of the testing memory slot. @@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(void) { uint64_t addr; - int i; + +#ifdef __s390x__ + uint64_t i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -107,16 +114,19 @@ static void guest_code(void) for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } +#endif while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + while (!READ_ONCE(vcpu_stop)) { addr = guest_test_virt_mem; addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + nr_writes++; } GUEST_SYNC(1); @@ -133,25 +143,18 @@ static uint64_t host_num_pages; /* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count; -static uint64_t host_track_next_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont; -/* - * This is only set by main thread, and only cleared by vcpu thread. It is - * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC - * is the only place that we'll guarantee both "dirty bit" and "dirty data" - * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted - * after setting dirty bit but before the data is written. - */ -static atomic_t vcpu_sync_stop_requested; + /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; + /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page; +static uint64_t dirty_ring_last_page = -1ULL; + +/* + * In addition to the above, it is possible (especially if this + * test is run nested) for the above scenario to repeat multiple times: + * + * The following can happen: + * + * - L1 vCPU: Memory write is logged to PML but not committed. + * + * - L1 test thread: Ignores the write because its last dirty ring entry + * Resets the dirty ring which: + * - Resets the A/D bits in EPT + * - Issues tlb flush (invept), which is intercepted by L0 + * + * - L0: frees the whole nested ept mmu root as the response to invept, + * and thus ensures that when memory write is retried, it will fault again + * + * - L1 vCPU: Same memory write is logged to the PML but not committed again. + * + * - L1 test thread: Ignores the write because its last dirty ring entry (again) + * Resets the dirty ring which: + * - Resets the A/D bits in EPT (again) + * - Issues tlb flush (again) which is intercepted by L0 + * + * ... + * + * N times + * + * - L1 vCPU: Memory write is logged in the PML and then committed. + * Lots of other memory writes are logged and committed. + * ... + * + * - L1 test thread: Sees the memory write along with other memory writes + * in the dirty ring, and since the write is usually not + * the last entry in the dirty-ring and has a very outdated + * iteration, the test fails. + * + * + * Note that this is only possible when the write was the last log entry + * write during iteration N-1, thus remember last iteration last log entry + * and also don't fail when it is reported in the next iteration, together with + * an outdated iteration count. + */ +static uint64_t dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; -static void vcpu_kick(void) -{ - pthread_kill(vcpu_thread, SIG_IPI); -} - -/* - * In our test we do signal tricks, let's use a better version of - * sem_wait to avoid signal interrupts - */ -static void sem_wait_until(sem_t *sem) -{ - int ret; - - do - ret = sem_wait(sem); - while (ret == -1 && errno == EINTR); -} - static bool clear_log_supported(void) { return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); @@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Should only be called after a GUEST_SYNC */ static void vcpu_handle_sync_stop(void) { - if (atomic_read(&vcpu_sync_stop_requested)) { - /* It means main thread is sleeping waiting */ - atomic_set(&vcpu_sync_stop_requested, false); + if (READ_ONCE(vcpu_stop)) { sem_post(&sem_vcpu_stop); - sem_wait_until(&sem_vcpu_cont); + sem_wait(&sem_vcpu_cont); } } -static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), - "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); @@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, "%u != %u", cur->slot, slot); TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); - //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); @@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, return count; } -static void dirty_ring_wait_vcpu(void) -{ - /* This makes sure that hardware PML cache flushed */ - vcpu_kick(); - sem_wait_until(&sem_vcpu_stop); -} - -static void dirty_ring_continue_vcpu(void) -{ - pr_info("Notifying vcpu to continue\n"); - sem_post(&sem_vcpu_cont); -} - static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx) { - uint32_t count = 0, cleared; - bool continued_vcpu = false; - - dirty_ring_wait_vcpu(); - - if (!dirty_ring_vcpu_ring_full) { - /* - * This is not a ring-full event, it's safe to allow - * vcpu to continue - */ - dirty_ring_continue_vcpu(); - continued_vcpu = true; - } + uint32_t count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - - if (!continued_vcpu) { - TEST_ASSERT(dirty_ring_vcpu_ring_full, - "Didn't continue vcpu even without ring full"); - dirty_ring_continue_vcpu(); - } - - pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ if (get_ucall(vcpu, NULL) == UCALL_SYNC) { - /* We should allow this to continue */ - ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || - (ret == -1 && err == EINTR)) { - /* Update the flag first before pause */ - WRITE_ONCE(dirty_ring_vcpu_ring_full, - run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&sem_vcpu_stop); - pr_info("vcpu stops because %s...\n", - dirty_ring_vcpu_ring_full ? - "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&sem_vcpu_cont); - pr_info("vcpu continues now.\n"); + vcpu_handle_sync_stop(); + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + WRITE_ONCE(dirty_ring_vcpu_ring_full, true); + vcpu_handle_sync_stop(); } else { TEST_ASSERT(false, "Invalid guest sync status: " "exit_reason=%s", @@ -426,7 +407,7 @@ struct log_mode { void *bitmap, uint32_t num_pages, uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -449,15 +430,6 @@ struct log_mode { }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } -static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vcpu, ret, err); + mode->after_vcpu_run(vcpu); } static void *vcpu_worker(void *data) { - int ret; struct kvm_vcpu *vcpu = data; - uint64_t pages_count = 0; - struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) - + sizeof(sigset_t)); - sigset_t *sigset = (sigset_t *) &sigmask->sigset; - /* - * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the - * ioctl to return with -EINTR, but it is still pending and we need - * to accept it with the sigwait. - */ - sigmask->len = 8; - pthread_sigmask(0, NULL, sigset); - sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); - - sigemptyset(sigset); - sigaddset(sigset, SIG_IPI); + sem_wait(&sem_vcpu_cont); while (!READ_ONCE(host_quit)) { - /* Clear any existing kick signals */ - pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = __vcpu_run(vcpu); - if (ret == -1 && errno == EINTR) { - int sig = -1; - sigwait(sigset, &sig); - assert(sig == SIG_IPI); - } - log_mode_after_vcpu_run(vcpu, ret, errno); + vcpu_run(vcpu); + log_mode_after_vcpu_run(vcpu); } - pr_info("Dirtied %"PRIu64" pages\n", pages_count); - return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { + uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0; uint64_t step = vm_num_host_pages(mode, 1); - uint64_t page; - uint64_t *value_ptr; - uint64_t min_iter = 0; for (page = 0; page < host_num_pages; page += step) { - value_ptr = host_test_mem + page * host_page_size; - - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } + uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - if (__test_and_clear_bit_le(page, bmap)) { - bool matched; - - host_dirty_count++; + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { + nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - matched = (*value_ptr == iteration || - *value_ptr == iteration - 1); - - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { - if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; + if (val == iteration) + continue; + + if (host_log_mode == LOG_MODE_DIRTY_RING) { + /* + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. + */ + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) continue; - } else if (page == dirty_ring_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && + val < iteration) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { - host_clear_count++; + nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } } + + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); + + host_dirty_count += nr_dirty_pages; + host_clear_count += nr_clean_pages; } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, @@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long *bmap[2]; uint32_t ring_buf_idx = 0; int sem_val; @@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + + /* + * The workaround in guest_code() to write all pages prior to the first + * iteration isn't compatible with the dirty ring, as the dirty ring + * support relies on the vCPU to actually stop when vcpu_stop is set so + * that the vCPU doesn't hang waiting for the dirty ring to be emptied. + */ + TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, + "Test needs to be updated to support s390 dirty ring"); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(host_quit, false); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg) sem_getvalue(&sem_vcpu_cont, &sem_val); TEST_ASSERT_EQ(sem_val, 0); + TEST_ASSERT_EQ(vcpu_stop, false); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, - &ring_buf_idx); + for (iteration = 1; iteration <= p->iterations; iteration++) { + unsigned long i; + + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); + + /* + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. + */ + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ + usleep(1000); + + sync_global_from_guest(vm, nr_writes); + + /* + * Reap dirty pages while the guest is running so that + * dirty ring full events are resolved, i.e. so that a + * larger interval doesn't always end up with a vCPU + * that's effectively blocked. Collecting while the + * guest is running also verifies KVM doesn't lose any + * state. + * + * For bitmap modes, KVM overwrites the entire bitmap, + * i.e. collecting the bitmaps is destructive. Collect + * the bitmap only on the first pass, otherwise this + * test would lose track of dirty pages. + */ + if (i && host_log_mode != LOG_MODE_DIRTY_RING) + continue; + + /* + * For the dirty ring, empty the ring on subsequent + * passes only if the ring was filled at least once, + * to verify KVM's handling of a full ring (emptying + * the ring on every pass would make it unlikely the + * vCPU would ever fill the fing). + */ + if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) + continue; + + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[0], host_num_pages, + &ring_buf_idx); + } + + /* + * Stop the vCPU prior to collecting and verifying the dirty + * log. If the vCPU is allowed to run during collection, then + * pages that are written during this iteration may be missed, + * i.e. collected in the next iteration. And if the vCPU is + * writing memory during verification, pages that this thread + * sees as clean may be written with this iteration's value. + */ + WRITE_ONCE(vcpu_stop, true); + sync_global_to_guest(vm, vcpu_stop); + sem_wait(&sem_vcpu_stop); /* - * See vcpu_sync_stop_requested definition for details on why - * we need to stop vcpu when verify data. + * Clear vcpu_stop after the vCPU thread has acknowledge the + * stop request and is waiting, i.e. is definitely not running! */ - atomic_set(&vcpu_sync_stop_requested, true); - sem_wait_until(&sem_vcpu_stop); + WRITE_ONCE(vcpu_stop, false); + sync_global_to_guest(vm, vcpu_stop); + + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ - assert(host_log_mode == LOG_MODE_DIRTY_RING || - atomic_read(&vcpu_sync_stop_requested) == false); + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - - /* - * Set host_quit before sem_vcpu_cont in the final iteration to - * ensure that the vCPU worker doesn't resume the guest. As - * above, the dirty ring test may stop and wait even when not - * explicitly request to do so, i.e. would hang waiting for a - * "continue" if it's allowed to resume the guest. - */ - if (++iteration == p->iterations) - WRITE_ONCE(host_quit, true); - - sem_post(&sem_vcpu_cont); - sync_global_to_guest(vm, iteration); } + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } @@ -857,7 +836,6 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; - sigset_t sigset; sem_init(&sem_vcpu_stop, 0, 0); sem_init(&sem_vcpu_cont, 0, 0); @@ -908,19 +886,12 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", p.iterations, p.interval); - srandom(time(0)); - - /* Ensure that vCPU threads start with SIG_IPI blocked. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 4c4e5a847f67..373912464fb4 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -46,6 +46,12 @@ struct userspace_mem_region { struct hlist_node slot_node; }; +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + struct kvm_vcpu { struct list_head list; uint32_t id; @@ -55,6 +61,7 @@ struct kvm_vcpu { #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif + struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; uint32_t fetch_index; uint32_t dirty_gfns_count; @@ -99,10 +106,7 @@ struct kvm_vm { struct kvm_vm_arch arch; - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; + struct kvm_binary_stats stats; /* * KVM region slots. These are the default memslots used by page @@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, struct kvm_stats_desc *desc, uint64_t *data, size_t max_elements); -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements); -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; +#define __get_stat(stats, stat) \ +({ \ + uint64_t data; \ + \ + kvm_get_stat(stats, #stat, &data, 1); \ + data; \ +}) - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} +#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) +#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) void vm_create_irqchip(struct kvm_vm *vm); @@ -963,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +void kvm_set_files_rlimit(uint32_t nr_vcpus); + void kvm_pin_this_task_to_pcpu(uint32_t pcpu); void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..77d13d7920cb 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -22,7 +22,7 @@ #define msecs_to_usecs(msec) ((msec) * 1000ULL) -static inline int _no_printf(const char *format, ...) { return 0; } +static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } #ifdef DEBUG #define pr_debug(...) printf(__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index d60da8966772..32ab6ca7ec32 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23) +#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24) +#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28) #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) @@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) +#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) /* * KVM defined paravirt features. @@ -285,6 +291,8 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) +#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3) +#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -1244,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, uint64_t ign_error_code; \ uint8_t vector; \ \ - asm volatile(KVM_ASM_SAFE(insn) \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ : inputs \ : KVM_ASM_SAFE_CLOBBERS); \ @@ -1339,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, GUEST_ASSERT(!ret); } +/* + * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's + * intended to be a wake event arrives *after* HLT is executed. Modern CPUs, + * except for a few oddballs that KVM is unlikely to run on, block IRQs for one + * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may + * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too. + */ +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +/* + * Enable interrupts and ensure that interrupts are evaluated upon return from + * this function, i.e. execute a nop to consume the STi interrupt shadow. + */ +static inline void sti_nop(void) +{ + asm volatile ("sti; nop"); +} + +/* + * Enable interrupts for one instruction (nop), to allow the CPU to process all + * interrupts that are already pending. + */ +static inline void sti_nop_cli(void) +{ + asm volatile ("sti; nop; cli"); +} + +static inline void sti(void) +{ + asm volatile("sti"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index c78f34699f73..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include "test_util.h" @@ -39,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 33fefeb3ca44..279ad8946040 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -12,6 +12,7 @@ #include <assert.h> #include <sched.h> #include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } const char *vm_guest_mode_string(uint32_t i) @@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } +void kvm_set_files_rlimit(uint32_t nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { @@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, struct kvm_vm *vm; int i; + kvm_set_files_rlimit(nr_runnable_vcpus); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); @@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + int ret; + + if (stats->fd < 0) + return; + + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } + + ret = close(stats->fd); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + stats->fd = -1; +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) ret = close(vcpu->fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_stats_release(&vcpu->stats); + list_del(&vcpu->list); vcpu_arch_free(vcpu); @@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp) ret = close(vmp->kvm_fd); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region); @@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vcpu->run != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; + /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -2198,46 +2259,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + uint64_t *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); } __weak void kvm_arch_vm_post_create(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 7c9de8414462..5bde176cedd5 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index d9c76b4c0d88..6a437d2be9fa 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -18,6 +18,7 @@ #include "ucall_common.h" static bool mprotect_ro_done; +static bool all_vcpus_hit_ro_fault; static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) { @@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) /* * Write to the region while mprotect(PROT_READ) is underway. Keep - * looping until the memory is guaranteed to be read-only, otherwise - * vCPUs may complete their writes and advance to the next stage - * prematurely. + * looping until the memory is guaranteed to be read-only and a fault + * has occurred, otherwise vCPUs may complete their writes and advance + * to the next stage prematurely. * * For architectures that support skipping the faulting instruction, * generate the store via inline assembly to ensure the exact length @@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #else vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); #endif - } while (!READ_ONCE(mprotect_ro_done)); + } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); /* * Only architectures that write the entire range can explicitly sync, @@ -81,6 +82,7 @@ struct vcpu_info { static int nr_vcpus; static atomic_t rendezvous; +static atomic_t nr_ro_faults; static void rendezvous_with_boss(void) { @@ -148,12 +150,16 @@ static void *vcpu_worker(void *data) * be stuck on the faulting instruction for other architectures. Go to * stage 3 without a rendezvous */ - do { - r = _vcpu_run(vcpu); - } while (!r); + r = _vcpu_run(vcpu); TEST_ASSERT(r == -1 && errno == EFAULT, "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno); + atomic_inc(&nr_ro_faults); + if (atomic_read(&nr_ro_faults) == nr_vcpus) { + WRITE_ONCE(all_vcpus_hit_ro_fault, true); + sync_global_to_guest(vm, all_vcpus_hit_ro_fault); + } + #if defined(__x86_64__) || defined(__aarch64__) /* * Verify *all* writes from the guest hit EFAULT due to the VMA now @@ -378,7 +384,6 @@ int main(int argc, char *argv[]) rendezvous_with_vcpus(&time_run2, "run 2"); mprotect(mem, slot_size, PROT_READ); - usleep(10); mprotect_ro_done = true; sync_global_to_guest(vm, mprotect_ro_done); diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..b0d2b04a7ff2 100644 --- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -41,9 +41,9 @@ struct kvm_page_stats { static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66..4e920705681a 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -41,13 +41,19 @@ static bool smt_possible(void) return res; } -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) +static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; + const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; u32 test_val; + if (vcpu) + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + else + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " (returned %d)", @@ -80,12 +86,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, entry->eax, evmcs_expected ); break; + case 0x40000003: + TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)), + "\"Direct\" Synthetic Timers should require in-kernel APIC"); + break; case 0x40000004: test_val = entry->eax & (1UL << 18); TEST_ASSERT(!!test_val == !smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); + + TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)), + "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC"); break; case 0x4000000A: TEST_ASSERT(entry->eax & (1UL << 19), @@ -109,9 +122,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } + + /* + * Note, the CPUID array returned by the system-scoped helper is a one- + * time allocation, i.e. must not be freed. + */ + if (vcpu) + free((void *)hv_cpuid_entries); } -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; @@ -129,19 +149,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + /* Test the vCPU ioctl without an in-kernel local APIC. */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_hv_cpuid(vcpu, false); + kvm_vm_free(vm); /* Test vCPU ioctl version */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, false); if (!kvm_cpu_has(X86_FEATURE_VMX) || !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { @@ -149,9 +170,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, true); do_sys: /* Test system ioctl version */ @@ -161,9 +180,7 @@ do_sys: } test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..2b5b4bc6ef7e 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* Signal sender vCPU we're ready */ ipis_rcvd[vcpu_id] = (u64)-1; - for (;;) - asm volatile("sti; hlt; cli"); + for (;;) { + safe_halt(); + cli(); + } } static void guest_ipi_handler(struct ex_regs *regs) diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c new file mode 100644 index 000000000000..abc824dba04f --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +enum { + SVM_F, + VMX_F, + NR_VIRTUALIZATION_FLAVORS, +}; + +struct emulated_instruction { + const char name[32]; + uint8_t opcode[15]; + uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS]; +}; + +static struct emulated_instruction instructions[] = { + { + .name = "pause", + .opcode = { 0xf3, 0x90 }, + .exit_reason = { SVM_EXIT_PAUSE, + EXIT_REASON_PAUSE_INSTRUCTION, } + }, + { + .name = "hlt", + .opcode = { 0xf4 }, + .exit_reason = { SVM_EXIT_HLT, + EXIT_REASON_HLT, } + }, +}; + +static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static uint8_t l2_guest_code[sizeof(kvm_fep) + 15]; +static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; + +static uint32_t get_instruction_length(struct emulated_instruction *insn) +{ + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) + ; + + return i; +} + +static void guest_code(void *test_data) +{ + int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F; + int i; + + memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep)); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, NULL, NULL); + vmcb->save.idtr.limit = 0; + vmcb->save.rip = (u64)l2_guest_code; + + vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) | + BIT_ULL(INTERCEPT_PAUSE) | + BIT_ULL(INTERCEPT_HLT); + vmcb->control.intercept_exceptions = 0; + } else { + GUEST_ASSERT(prepare_for_vmx_operation(test_data)); + GUEST_ASSERT(load_vmcs(test_data)); + + prepare_vmcs(test_data, NULL, NULL); + GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0)); + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0)); + + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_HLT_EXITING); + } + + for (i = 0; i < ARRAY_SIZE(instructions); i++) { + struct emulated_instruction *insn = &instructions[i]; + uint32_t insn_len = get_instruction_length(insn); + uint32_t exit_insn_len; + u32 exit_reason; + + /* + * Copy the target instruction to the L2 code stream, and fill + * the remaining bytes with INT3s so that a missed intercept + * results in a consistent failure mode (SHUTDOWN). + */ + memcpy(l2_instruction, insn->opcode, insn_len); + memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + run_guest(vmcb, svm->vmcb_gpa); + exit_reason = vmcb->control.exit_code; + exit_insn_len = vmcb->control.next_rip - vmcb->save.rip; + GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction); + } else { + GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0); + exit_reason = vmreadz(VM_EXIT_REASON); + exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction); + } + + __GUEST_ASSERT(exit_reason == insn->exit_reason[f], + "Wanted exit_reason '0x%x' for '%s', got '0x%x'", + insn->exit_reason[f], insn->name, exit_reason); + + __GUEST_ASSERT(exit_insn_len == insn_len, + "Wanted insn_len '%u' for '%s', got '%u'", + insn_len, insn->name, exit_insn_len); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3eb0313ffa39..3641a42934ac 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector)); GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code); + GUEST_ASSERT(!ctrl->int_state); } static void l1_svm_code(struct svm_test_data *svm) @@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector); GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code); + GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO)); } static void l1_vmx_code(struct vmx_pages *vmx) diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..c0d84827f736 100644 --- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 698cb36989db..8aaaf25b6111 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -17,7 +17,7 @@ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, * 1 LOOP. */ -#define NUM_INSNS_PER_LOOP 3 +#define NUM_INSNS_PER_LOOP 4 /* * Number of "extra" instructions that will be counted, i.e. the number of @@ -29,10 +29,59 @@ /* Total number of instructions retired within the measured section. */ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) +/* Track which architectural events are supported by hardware. */ +static uint32_t hardware_pmu_arch_events; static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +struct kvm_intel_pmu_event { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; +}; + +/* + * Wrap the array to appease the compiler, as the macros used to construct each + * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the + * compiler often thinks the feature definitions aren't compile-time constants. + */ +static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx) +{ + const struct kvm_intel_pmu_event __intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same as the + * general purpose architectural event. The fixed counter explicitly + * counts at the same frequency as the TSC, whereas the GP event counts + * at a fixed, but uarch specific, frequency. Bundle them here for + * simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + }; + + kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); + + return __intel_event_to_feature[idx]; +} + static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, uint8_t pmu_version, @@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, vm = vm_create_with_one_vcpu(vcpu, guest_code); sync_global_to_guest(vm, kvm_pmu_version); + sync_global_to_guest(vm, hardware_pmu_arch_events); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, - struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr) +static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr) { uint64_t count; count = _rdpmc(pmc); - if (!this_pmu_has(event)) + if (!(hardware_pmu_arch_events & BIT(idx))) goto sanity_checks; switch (idx) { @@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx, GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: - GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, + "Expected top-down slots >= %u, got count = %lu", + NUM_INSNS_RETIRED, count); break; default: break; @@ -162,75 +212,42 @@ do { \ "1:\n\t" \ clflush "\n\t" \ "mfence\n\t" \ + "mov %[m], %%eax\n\t" \ FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ "wrmsr\n\t" \ :: "a"((uint32_t)_value), "d"(_value >> 32), \ - "c"(_msr), "D"(_msr) \ + "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ ); \ } while (0) -#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ do { \ - wrmsr(pmc_msr, 0); \ + wrmsr(_pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ - guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ + guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr, +static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr, uint32_t ctrl_msr, uint64_t ctrl_msr_value) { - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); if (is_forced_emulation_enabled) - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); -} - -#define X86_PMU_FEATURE_NULL \ -({ \ - struct kvm_x86_pmu_feature feature = {}; \ - \ - feature; \ -}) - -static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) -{ - return !(*(u64 *)&event); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); } static void guest_test_arch_event(uint8_t idx) { - const struct { - struct kvm_x86_pmu_feature gp_event; - struct kvm_x86_pmu_feature fixed_event; - } intel_event_to_feature[] = { - [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, - /* - * Note, the fixed counter for reference cycles is NOT the same - * as the general purpose architectural event. The fixed counter - * explicitly counts at the same frequency as the TSC, whereas - * the GP event counts at a fixed, but uarch specific, frequency. - * Bundle them here for simplicity. - */ - [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, - [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, - }; - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint32_t pmu_version = guest_get_pmu_version(); /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ @@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx) else base_pmc_msr = MSR_IA32_PERFCTR0; - gp_event = intel_event_to_feature[idx].gp_event; + gp_event = intel_event_to_feature(idx).gp_event; GUEST_ASSERT_EQ(idx, gp_event.f.bit); GUEST_ASSERT(nr_gp_counters); @@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx) if (guest_has_perf_global_ctrl) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); - __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, + __guest_test_arch_event(idx, i, base_pmc_msr + i, MSR_P6_EVNTSEL0 + i, eventsel); } if (!guest_has_perf_global_ctrl) return; - fixed_event = intel_event_to_feature[idx].fixed_event; + fixed_event = intel_event_to_feature(idx).fixed_event; if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) return; @@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx) wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, + __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, MSR_CORE_PERF_FIXED_CTR0 + i, MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); @@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ expect_gp ? "#GP" : "no fault", msr, vector) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ - __GUEST_ASSERT(val == expected_val, \ + __GUEST_ASSERT(val == expected, \ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ - msr, expected_val, val); + msr, expected, val); static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, uint64_t expected_val) @@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void test_intel_counters(void) { - uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); @@ -567,18 +583,26 @@ static void test_intel_counters(void) /* * Detect the existence of events that aren't supported by selftests. - * This will (obviously) fail any time the kernel adds support for a - * new event, but it's worth paying that price to keep the test fresh. + * This will (obviously) fail any time hardware adds support for a new + * event, but it's worth paying that price to keep the test fresh. */ - TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, + TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, "New architectural event(s) detected; please update this test (length = %u, mask = %x)", - nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), + this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); /* - * Force iterating over known arch events regardless of whether or not - * KVM/hardware supports a given event. + * Iterate over known arch events irrespective of KVM/hardware support + * to verify that KVM doesn't reject programming of events just because + * the *architectural* encoding is unsupported. Track which events are + * supported in hardware; the guest side will validate supported events + * count correctly, even if *enumeration* of the event is unsupported + * by KVM and/or isn't exposed to the guest. */ - nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { + if (this_pmu_has(intel_event_to_feature(i).gp_event)) + hardware_pmu_arch_events |= BIT(i); + } for (v = 0; v <= max_pmu_version; v++) { for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { @@ -594,8 +618,8 @@ static void test_intel_counters(void) * vector length. */ if (v == pmu_version) { - for (k = 1; k < (BIT(nr_arch_events) - 1); k++) - test_arch_events(v, perf_caps[i], nr_arch_events, k); + for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++) + test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k); } /* * Test single bits for all PMU version and lengths up @@ -604,11 +628,11 @@ static void test_intel_counters(void) * host length). Explicitly test a mask of '0' and all * ones i.e. all events being available and unavailable. */ - for (j = 0; j <= nr_arch_events + 1; j++) { + for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { test_arch_events(v, perf_caps[i], j, 0); test_arch_events(v, perf_caps[i], j, 0xff); - for (k = 0; k < nr_arch_events; k++) + for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++) test_arch_events(v, perf_caps[i], j, BIT(k)); } diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index a1a688e75266..d97816dc476a 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) bool bad = false; for (i = 0; i < 4095; i++) { if (from_host[i] != from_guest[i]) { - printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + printf("mismatch at %u | %02hhx %02hhx\n", + i, from_host[i], from_guest[i]); bad = true; } } diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..917b6066cfc1 100644 --- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm) x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); GUEST_ASSERT(vintr_irq_called); GUEST_ASSERT(intr_irq_called); diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..1e5e564523b3 100644 --- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void) wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); /* Enables interrupt in guest. */ - asm volatile("sti"); + sti(); /* Let user space inject the first UCNA */ GUEST_SYNC(SYNC_FIRST_UCNA); diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..35cb9de54a82 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data) data->halter_tpr = xapic_read_reg(APIC_TASKPRI); data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; - asm volatile("sti; hlt; cli"); + safe_halt(); + cli(); data->wake_count++; } } @@ -465,6 +466,19 @@ int main(int argc, char *argv[]) cancel_join_vcpu_thread(threads[0], params[0].vcpu); cancel_join_vcpu_thread(threads[1], params[1].vcpu); + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + fprintf(stderr, "Test successful after running for %d seconds.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 88bcca188799..fdebff1165c7 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -18,7 +18,7 @@ struct xapic_vcpu { static void xapic_guest_code(void) { - asm volatile("cli"); + cli(); xapic_enable(); @@ -38,7 +38,7 @@ static void xapic_guest_code(void) static void x2apic_guest_code(void) { - asm volatile("cli"); + cli(); x2apic_enable(); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..287829f850f7 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -191,10 +191,7 @@ static void guest_code(void) struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; int i; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); /* Trigger an interrupt injection */ GUEST_SYNC(TEST_INJECT_VECTOR); diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore index 470203a7cd73..335b2b1a3463 100644 --- a/tools/testing/selftests/landlock/.gitignore +++ b/tools/testing/selftests/landlock/.gitignore @@ -1,2 +1,4 @@ /*_test +/sandbox-and-launch /true +/wait-pipe diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index a604ea5d8297..6064c9ac0532 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -207,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) struct protocol_variant { int domain; int type; + int protocol; }; struct service_fixture { diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 29af19c4e9f9..425de4c20271 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,8 +1,11 @@ +CONFIG_AF_UNIX_OOB=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y CONFIG_KEYS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4e0aeb53b225..d9de0ee49ebc 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata) clear_ambient_cap(_metadata, CAP_NET_ADMIN); } +static bool prot_is_tcp(const struct protocol_variant *const prot) +{ + return (prot->domain == AF_INET || prot->domain == AF_INET6) && + prot->type == SOCK_STREAM && + (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP); +} + static bool is_restricted(const struct protocol_variant *const prot, const enum sandbox_type sandbox) { - switch (prot->domain) { - case AF_INET: - case AF_INET6: - switch (prot->type) { - case SOCK_STREAM: - return sandbox == TCP_SANDBOX; - } - break; - } + if (sandbox == TCP_SANDBOX) + return prot_is_tcp(prot); return false; } @@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv) int ret; ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, - 0); + srv->protocol.protocol); if (ret < 0) return -errno; return ret; @@ -290,22 +290,70 @@ FIXTURE_TEARDOWN(protocol) } /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, }, }; @@ -350,22 +398,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, }, }; diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index ada9156cc497..c463d1c09c9b 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/mman.h> #include <errno.h> #include <fcntl.h> /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 66b4e111b5a2..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include <string.h> #include <stdbool.h> #include <stdint.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <fcntl.h> #include <sys/mman.h> @@ -369,6 +369,7 @@ unmap: munmap(map, size); } +#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -429,6 +430,7 @@ close_uffd: unmap: munmap(map, size); } +#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -684,7 +686,9 @@ int main(int argc, char **argv) exit(test_child_ksm()); } +#ifdef __NR_userfaultfd tests++; +#endif ksft_print_header(); ksft_set_plan(tests); @@ -696,7 +700,9 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); +#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); +#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 74c911aa3aea..9a0597310a76 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <string.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <stdio.h> #include <fcntl.h> @@ -28,6 +28,8 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) +#ifdef __NR_memfd_secret + #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -332,3 +334,13 @@ int main(int argc, char *argv[]) ksft_finished(); } + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index af2fce496912..09feeb453646 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include <fcntl.h> #include <signal.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <string.h> #include <errno.h> #include <stdlib.h> @@ -265,6 +265,7 @@ munmap: munmap(mmap_mem, mmap_size); } +#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,6 +323,7 @@ munmap: munmap(dst, pagesize); free(src); } +#endif /* __NR_userfaultfd */ int main(void) { @@ -334,7 +336,9 @@ int main(void) thpsize / 1024); tests += 3; } +#ifdef __NR_userfaultfd tests += 1; +#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -364,7 +368,9 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ +#ifdef __NR_userfaultfd test_uffdio_copy(); +#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 1e5731bab499..4417eaa5cfb7 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,7 +3,6 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> -#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index a4683f2476f2..35565af308af 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/ptrace.h> #include <setjmp.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 333c468c2699..da7e26668103 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -220,7 +220,7 @@ run_test() { if test_selected ${CATEGORY}; then # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. - if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then + if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then echo 3 > /proc/sys/vm/drop_caches sleep 2 echo 1 > /proc/sys/vm/compact_memory diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 717539eddf98..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { +#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index a4b83280998a..944d559ade21 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,11 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ -#include <asm-generic/unistd.h> + #include "uffd-common.h" uint64_t features; +#ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -471,3 +472,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 9ff71fa1f9bf..74c8bc02b506 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,11 +5,12 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ -#include <asm-generic/unistd.h> #include "uffd-common.h" #include "../../../../mm/gup_test.h" +#ifdef __NR_userfaultfd + /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1558,3 +1559,14 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 73ee88d6b043..5916f3b81c39 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh index 3885a2a91f7d..baed5e380dae 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -20,6 +20,7 @@ ALL_TESTS=" test_port_range_ipv4_tcp test_port_range_ipv6_udp test_port_range_ipv6_tcp + test_port_range_ipv4_udp_drop " NUM_NETIFS=4 @@ -194,6 +195,51 @@ test_port_range_ipv6_tcp() __test_port_range $proto $ip_proto $sip $dip $mode "$name" } +test_port_range_ipv4_udp_drop() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP Drop" + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=2000 + local sport_max=3000 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport=5000 + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport \ + action drop + + # Test ports outside range - should pass + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport" + + # Test ports inside range - should be dropped + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport" + + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Filter did not drop the expected number of packets" + + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index bc6b6762baf3..c22623b9a2a5 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c new file mode 100644 index 000000000000..e73fab3edd9f --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d8..1559ba275105 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06..4788641717d9 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 785e3875a6da..784d1b46912b 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h index de31a0143139..f02f411d550d 100644 --- a/tools/testing/selftests/rseq/rseq-riscv-bits.h +++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h @@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") #endif - RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3) RSEQ_INJECT_ASM(4) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ @@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [ptr] "r" (ptr), - [off] "er" (off), - [inc] "er" (inc) + [off] "r" (off), + [inc] "r" (inc) RSEQ_INJECT_INPUT : "memory", RSEQ_ASM_TMP_REG_1 RSEQ_INJECT_CLOBBER diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 37e598d0a365..67d544aaa9a3 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -158,7 +158,7 @@ do { \ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ "333:\n" -#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ RSEQ_ASM_OP_R_ADD(off) \ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c index fa946d9146d4..d67431f57ac6 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.c +++ b/tools/testing/selftests/sched_ext/create_dsq.c @@ -14,11 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct create_dsq *skel; - skel = create_dsq__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = create_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(create_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c index e65d22f23f3b..b6d13496b24e 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_bogus_dsq_fail *skel; - skel = ddsp_bogus_dsq_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_bogus_dsq_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_bogus_dsq_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c index abafee587cd6..af9ce4ee8baa 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_vtimelocal_fail *skel; - skel = ddsp_vtimelocal_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_vtimelocal_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_vtimelocal_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index fbda6bf54671..c02b2aa6fc64 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus) + if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p)) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 0ff27e57fe43..e1f2ce4abfe6 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -15,6 +15,7 @@ static enum scx_test_status setup(void **ctx) skel = dsp_local_on__open(); SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel"); diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 73e679953e27..d3387ae03679 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_last_no_enq_fails *skel; - skel = enq_last_no_enq_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_last_no_enq_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_last_no_enq_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c index dd1350e5f002..a80e3a3b3698 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_select_cpu_fails *skel; - skel = enq_select_cpu_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_select_cpu_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 31bcd06e21cd..9451782689de 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -23,6 +23,7 @@ static enum scx_test_status run(void *ctx) char buf[16]; skel = exit__open(); + SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; exit__load(skel); link = bpf_map__attach_struct_ops(skel->maps.exit_ops); diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 87bf220b1bce..1c9ceb661c43 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -49,8 +49,10 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) SCX_ASSERT(is_cpu_online()); - skel = hotplug__open_and_load(); - SCX_ASSERT(skel); + skel = hotplug__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(hotplug__load(skel), "Failed to load skel"); /* Testing the offline -> online path, so go offline before starting */ if (onlining) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 97d45f1e5597..eddf9e0e26e7 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -15,22 +15,6 @@ #define SCHED_EXT 7 -static struct init_enable_count * -open_load_prog(bool global) -{ - struct init_enable_count *skel; - - skel = init_enable_count__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); - - if (!global) - skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; - - SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel"); - - return skel; -} - static enum scx_test_status run_test(bool global) { struct init_enable_count *skel; @@ -40,7 +24,14 @@ static enum scx_test_status run_test(bool global) struct sched_param param = {}; pid_t pids[num_pre_forks]; - skel = open_load_prog(global); + skel = init_enable_count__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + + if (!global) + skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; + + SCX_FAIL_IF(init_enable_count__load(skel), "Failed to load skel"); /* * Fork a bunch of children before we attach the scheduler so that we @@ -159,7 +150,7 @@ static enum scx_test_status run(void *ctx) struct scx_test init_enable_count = { .name = "init_enable_count", - .description = "Verify we do the correct amount of counting of init, " + .description = "Verify we correctly count the occurrences of init, " "enable, etc callbacks.", .run = run, }; diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c index f38fc973c380..c6be50a9941d 100644 --- a/tools/testing/selftests/sched_ext/maximal.c +++ b/tools/testing/selftests/sched_ext/maximal.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct maximal *skel; - skel = maximal__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c index 31cfafb0cf65..aacf0c58ca4f 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.c +++ b/tools/testing/selftests/sched_ext/maybe_null.c @@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx) struct scx_test maybe_null = { .name = "maybe_null", - .description = "Verify if PTR_MAYBE_NULL work for .dispatch", + .description = "Verify if PTR_MAYBE_NULL works for .dispatch", .run = run, }; REGISTER_SCX_TEST(&maybe_null) diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c index 6c5db8ebbf8a..89f7261757ff 100644 --- a/tools/testing/selftests/sched_ext/minimal.c +++ b/tools/testing/selftests/sched_ext/minimal.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct minimal *skel; - skel = minimal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = minimal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(minimal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c index 3cd57ef8daaa..05974820ca69 100644 --- a/tools/testing/selftests/sched_ext/prog_run.c +++ b/tools/testing/selftests/sched_ext/prog_run.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct prog_run *skel; - skel = prog_run__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = prog_run__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(prog_run__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c index 5cfba2d6e056..308211d80436 100644 --- a/tools/testing/selftests/sched_ext/reload_loop.c +++ b/tools/testing/selftests/sched_ext/reload_loop.c @@ -18,11 +18,10 @@ bool force_exit = false; static enum scx_test_status setup(void **ctx) { - skel = maximal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); return SCX_TEST_PASS; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c index a53a40c2d2f0..5b6e045e1109 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl *skel; - skel = select_cpu_dfl__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c index 1d85bf4bf3a3..9b5d232efb7f 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl_nodispatch *skel; - skel = select_cpu_dfl_nodispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl_nodispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl_nodispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c index 0309ca8785b3..80283dbc41b7 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch *skel; - skel = select_cpu_dispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c index 47eb6ed7627d..5e72ebbc90a5 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_bad_dsq *skel; - skel = select_cpu_dispatch_bad_dsq__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_bad_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_bad_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c index 48ff028a3c46..aa85949478bc 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_dbl_dsp *skel; - skel = select_cpu_dispatch_dbl_dsp__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_dbl_dsp__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_dbl_dsp__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c index b4629c2364f5..1e9b5c9bfff1 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_vtime *skel; - skel = select_cpu_vtime__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_vtime__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_vtime__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json index 7126ec3485cb..2b61d8d79bde 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json @@ -61,5 +61,30 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4009", + "name": "Reject creation of DRR class with classid TC_H_ROOT", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class drr ffff:ffff", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 2fe5e983cb22..f89d052c730e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -53,7 +53,7 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *gnu_hash; + ELF(Word) *gnu_hash, *gnu_bucket; ELF_HASH_ENTRY *bucket, *chain; ELF_HASH_ENTRY nbucket, nchain; @@ -185,8 +185,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* The bucket array is located after the header (4 uint32) and the bloom * filter (size_t array of gnu_hash[2] elements). */ - vdso_info.bucket = vdso_info.gnu_hash + 4 + - sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; + vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 + + sizeof(size_t) / 4 * vdso_info.gnu_hash[2]; } else { vdso_info.nbucket = hash[0]; vdso_info.nchain = hash[1]; @@ -268,11 +268,11 @@ void *vdso_sym(const char *version, const char *name) if (vdso_info.gnu_hash) { uint32_t h1 = gnu_hash(name), h2, *hashval; - i = vdso_info.bucket[h1 % vdso_info.nbucket]; + i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket]; if (i == 0) return 0; h1 |= 1; - hashval = vdso_info.bucket + vdso_info.nbucket + + hashval = vdso_info.gnu_bucket + vdso_info.nbucket + (i - vdso_info.gnu_hash[1]); for (;; i++) { ELF(Sym) *sym = &vdso_info.symtab[i]; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index dfff8b288265..d0f6d253ac72 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) close(fd); } +static void test_stream_linger_client(const struct test_opts *opts) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = 1 + }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_connect_retry_client, .run_server = test_stream_connect_retry_server, }, + { + .name = "SOCK_STREAM SO_LINGER null-ptr-deref", + .run_client = test_stream_linger_client, + .run_server = test_stream_linger_server, + }, {}, }; diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile index f2552f73a64c..056d212f25cf 100644 --- a/tools/thermal/lib/Makefile +++ b/tools/thermal/lib/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile index 6518b03e05c7..257a56b1899f 100644 --- a/tools/tracing/latency/Makefile +++ b/tools/tracing/latency/Makefile @@ -37,12 +37,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(LATENCY-COLLECTOR) include $(srctree)/tools/build/Makefile.include diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 8b5101457c70..0b61208db604 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -37,12 +37,6 @@ FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs FEATURE_DISPLAY += libcpupower -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RTLA) include $(srctree)/tools/build/Makefile.include diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile index 411d62b3d8eb..5b898360ba48 100644 --- a/tools/verification/rv/Makefile +++ b/tools/verification/rv/Makefile @@ -35,12 +35,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RV) include $(srctree)/tools/build/Makefile.include |