diff options
Diffstat (limited to 'tools')
459 files changed, 23569 insertions, 4914 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index ce2ee8f1e361..9306726337fe 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -19,7 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_MEMFD_SECRET #include <asm-generic/unistd.h> diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h index 0c743344e92d..8eeaac0087c3 100644 --- a/tools/arch/loongarch/include/uapi/asm/unistd.h +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -4,6 +4,5 @@ */ #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_SYS_CLONE3 #include <asm-generic/unistd.h> diff --git a/tools/arch/x86/kcpuid/Makefile b/tools/arch/x86/kcpuid/Makefile index 87b554fab14b..d0b4b0ed10ff 100644 --- a/tools/arch/x86/kcpuid/Makefile +++ b/tools/arch/x86/kcpuid/Makefile @@ -19,6 +19,6 @@ clean : @rm -f kcpuid install : kcpuid - install -d $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(HWDATADIR) install -m 755 -p kcpuid $(DESTDIR)$(BINDIR)/kcpuid - install -m 444 -p cpuid.csv $(HWDATADIR)/cpuid.csv + install -m 444 -p cpuid.csv $(DESTDIR)$(HWDATADIR)/cpuid.csv diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index eaba24320fb2..3f6bca03ad2e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -28,7 +28,7 @@ BTF COMMANDS | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } -| *FORMAT* := { **raw** | **c** } +| *FORMAT* := { **raw** | **c** [**unsorted**] } | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } @@ -63,7 +63,9 @@ bpftool btf dump *BTF_SRC* pahole. **format** option can be used to override default (raw) output format. Raw - (**raw**) or C-syntax (**c**) output formats are supported. + (**raw**) or C-syntax (**c**) output formats are supported. With C-style + formatting, the output is sorted by default. Use the **unsorted** option + to avoid sorting the output. bpftool btf help Print short help message. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dfa4f1bebbb3..ba927379eb20 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -204,10 +204,11 @@ ifeq ($(feature-clang-bpf-co-re),1) BUILD_BPF_SKELS := 1 -$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) ifeq ($(VMLINUX_H),) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ else +$(OUTPUT)vmlinux.h: $(VMLINUX_H) $(Q)cp "$(VMLINUX_H)" $@ endif diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 04afe2ac2228..be99d49b8714 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -930,6 +930,9 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + c) + COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) ) + ;; *) # emit extra options case ${words[3]} in diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 91fcb75babe3..6789c7a4d5ca 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -20,6 +20,8 @@ #include "json_writer.h" #include "main.h" +#define KFUNC_DECL_TAG "bpf_kfunc" + static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", @@ -43,6 +45,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_ENUM64] = "ENUM64", }; +struct sort_datum { + int index; + int type_rank; + const char *sort_name; + const char *own_name; +}; + static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -454,15 +463,171 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); + int cnt = btf__type_cnt(btf); + int i; + + printf("\n/* BPF kfuncs */\n"); + printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); + + for (i = 1; i < cnt; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const char *name; + int err; + + if (!btf_is_decl_tag(t)) + continue; + + if (btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + continue; + + t = btf__type_by_id(btf, t->type); + if (!btf_is_func(t)) + continue; + + printf("extern "); + + opts.field_name = btf__name_by_offset(btf, t->name_off); + err = btf_dump__emit_type_decl(d, t->type, &opts); + if (err) + return err; + + printf(" __weak __ksym;\n"); + } + + printf("#endif\n\n"); + + return 0; +} + static void __printf(2, 0) btf_dump_printf(void *ctx, const char *fmt, va_list args) { vfprintf(stdout, fmt, args); } +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + const int kind = btf_kind(t); + const int max_rank = 10; + + if (t->name_off) + has_name = true; + + switch (kind) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return has_name ? 1 : 0; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + return 2; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return has_name ? 3 : max_rank; + case BTF_KIND_FUNC_PROTO: + return has_name ? 4 : max_rank; + case BTF_KIND_ARRAY: + if (has_name) + return btf_type_rank(btf, btf_array(t)->type, has_name); + return max_rank; + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + if (has_name) + return btf_type_rank(btf, t->type, has_name); + return max_rank; + default: + return max_rank; + } +} + +static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool from_ref) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + + switch (btf_kind(t)) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: { + int name_off = t->name_off; + + /* Use name of the first element for anonymous enums if allowed */ + if (!from_ref && !t->name_off && btf_vlen(t)) + name_off = btf_enum(t)->name_off; + + return btf__name_by_offset(btf, name_off); + } + case BTF_KIND_ARRAY: + return btf_type_sort_name(btf, btf_array(t)->type, true); + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + return btf_type_sort_name(btf, t->type, true); + default: + return btf__name_by_offset(btf, t->name_off); + } + return NULL; +} + +static int btf_type_compare(const void *left, const void *right) +{ + const struct sort_datum *d1 = (const struct sort_datum *)left; + const struct sort_datum *d2 = (const struct sort_datum *)right; + int r; + + if (d1->type_rank != d2->type_rank) + return d1->type_rank < d2->type_rank ? -1 : 1; + + r = strcmp(d1->sort_name, d2->sort_name); + if (r) + return r; + + return strcmp(d1->own_name, d2->own_name); +} + +static struct sort_datum *sort_btf_c(const struct btf *btf) +{ + struct sort_datum *datums; + int n; + + n = btf__type_cnt(btf); + datums = malloc(sizeof(struct sort_datum) * n); + if (!datums) + return NULL; + + for (int i = 0; i < n; ++i) { + struct sort_datum *d = datums + i; + const struct btf_type *t = btf__type_by_id(btf, i); + + d->index = i; + d->type_rank = btf_type_rank(btf, i, false); + d->sort_name = btf_type_sort_name(btf, i, false); + d->own_name = btf__name_by_offset(btf, t->name_off); + } + + qsort(datums, n, sizeof(struct sort_datum), btf_type_compare); + + return datums; +} + static int dump_btf_c(const struct btf *btf, - __u32 *root_type_ids, int root_type_cnt) + __u32 *root_type_ids, int root_type_cnt, bool sort_dump) { + struct sort_datum *datums = NULL; struct btf_dump *d; int err = 0, i; @@ -476,6 +641,12 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); + printf("#ifndef __ksym\n"); + printf("#define __ksym __attribute__((section(\".ksyms\")))\n"); + printf("#endif\n\n"); + printf("#ifndef __weak\n"); + printf("#define __weak __attribute__((weak))\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { @@ -486,11 +657,19 @@ static int dump_btf_c(const struct btf *btf, } else { int cnt = btf__type_cnt(btf); + if (sort_dump) + datums = sort_btf_c(btf); for (i = 1; i < cnt; i++) { - err = btf_dump__dump_type(d, i); + int idx = datums ? datums[i].index : i; + + err = btf_dump__dump_type(d, idx); if (err) goto done; } + + err = dump_btf_kfuncs(d, btf); + if (err) + goto done; } printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); @@ -500,6 +679,7 @@ static int dump_btf_c(const struct btf *btf, printf("#endif /* __VMLINUX_H__ */\n"); done: + free(datums); btf_dump__free(d); return err; } @@ -549,10 +729,10 @@ static bool btf_is_kernel_module(__u32 btf_id) static int do_dump(int argc, char **argv) { + bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; __u32 root_type_ids[2]; int root_type_cnt = 0; - bool dump_c = false; __u32 btf_id = -1; const char *src; int fd = -1; @@ -663,6 +843,9 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "unsorted")) { + sort_dump_c = false; + NEXT_ARG(); } else { p_err("unrecognized option: '%s'", *argv); err = -EINVAL; @@ -691,7 +874,7 @@ static int do_dump(int argc, char **argv) err = -ENOTSUP; goto done; } - err = dump_btf_c(btf, root_type_ids, root_type_cnt); + err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c); } else { err = dump_btf_raw(btf, root_type_ids, root_type_cnt); } @@ -1063,7 +1246,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" - " FORMAT := { raw | c }\n" + " FORMAT := { raw | c [unsorted] }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS " |\n" diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index af6898c0f388..9af426d43299 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -19,6 +19,38 @@ #include "main.h" +static const int cgroup_attach_types[] = { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_RELEASE, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_INET4_GETPEERNAME, + BPF_CGROUP_INET6_GETPEERNAME, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_INET4_GETSOCKNAME, + BPF_CGROUP_INET6_GETSOCKNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UDP4_RECVMSG, + BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_DEVICE, + BPF_CGROUP_SYSCTL, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, + BPF_LSM_CGROUP +}; + #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" @@ -183,13 +215,13 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) static int cgroup_has_attached_progs(int cgroup_fd) { - enum bpf_attach_type type; + unsigned int i = 0; bool no_prog = true; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { + int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0 && errno != EINVAL) + if (count < 0) return -1; if (count > 0) { diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 958e92acca8e..9b75639434b8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -410,7 +410,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, { const char *prog_name = prog_info->name; const struct btf_type *func_type; - const struct bpf_func_info finfo = {}; + struct bpf_func_info finfo = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); struct btf *prog_btf = NULL; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b3979ddc0189..5a4d3240689e 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -848,28 +848,45 @@ out: } static void -codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) +codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool populate_links) { struct bpf_map *map; char ident[256]; - size_t i; + size_t i, map_sz; if (!map_cnt) return; + /* for backward compatibility with old libbpf versions that don't + * handle new BPF skeleton with new struct bpf_map_skeleton definition + * that includes link field, avoid specifying new increased size, + * unless we absolutely have to (i.e., if there are struct_ops maps + * present) + */ + map_sz = offsetof(struct bpf_map_skeleton, link); + if (populate_links) { + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + map_sz = sizeof(struct bpf_map_skeleton); + break; + } + } + } + codegen("\ \n\ - \n\ + \n\ /* maps */ \n\ s->map_cnt = %zu; \n\ - s->map_skel_sz = sizeof(*s->maps); \n\ - s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ + s->map_skel_sz = %zu; \n\ + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt,\n\ + sizeof(*s->maps) > %zu ? sizeof(*s->maps) : %zu);\n\ if (!s->maps) { \n\ err = -ENOMEM; \n\ goto err; \n\ } \n\ ", - map_cnt + map_cnt, map_sz, map_sz, map_sz ); i = 0; bpf_object__for_each_map(map, obj) { @@ -878,15 +895,22 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) codegen("\ \n\ - \n\ - s->maps[%zu].name = \"%s\"; \n\ - s->maps[%zu].map = &obj->maps.%s; \n\ + \n\ + map = (struct bpf_map_skeleton *)((char *)s->maps + %zu * s->map_skel_sz);\n\ + map->name = \"%s\"; \n\ + map->map = &obj->maps.%s; \n\ ", - i, bpf_map__name(map), i, ident); + i, bpf_map__name(map), ident); /* memory-mapped internal maps */ if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { - printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", - i, ident); + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident); + } + + if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + codegen("\ + \n\ + map->link = &obj->links.%s; \n\ + ", ident); } i++; } @@ -1141,7 +1165,7 @@ static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; - size_t map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + size_t map_cnt = 0, prog_cnt = 0, attach_map_cnt = 0, file_sz, mmap_sz; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; @@ -1225,6 +1249,10 @@ static int do_skeleton(int argc, char **argv) bpf_map__name(map)); continue; } + + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) + attach_map_cnt++; + map_cnt++; } bpf_object__for_each_program(prog, obj) { @@ -1260,6 +1288,8 @@ static int do_skeleton(int argc, char **argv) #include <stdlib.h> \n\ #include <bpf/libbpf.h> \n\ \n\ + #define BPF_SKEL_SUPPORTS_MAP_AUTO_ATTACH 1 \n\ + \n\ struct %1$s { \n\ struct bpf_object_skeleton *skeleton; \n\ struct bpf_object *obj; \n\ @@ -1297,6 +1327,9 @@ static int do_skeleton(int argc, char **argv) bpf_program__name(prog)); } printf("\t} progs;\n"); + } + + if (prog_cnt + attach_map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { if (use_loader) @@ -1306,6 +1339,19 @@ static int do_skeleton(int argc, char **argv) printf("\t\tstruct bpf_link *%s;\n", bpf_program__name(prog)); } + + bpf_object__for_each_map(map, obj) { + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + if (use_loader) + printf("t\tint %s_fd;\n", ident); + else + printf("\t\tstruct bpf_link *%s;\n", ident); + } + printf("\t} links;\n"); } @@ -1433,6 +1479,7 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ @@ -1448,7 +1495,7 @@ static int do_skeleton(int argc, char **argv) obj_name ); - codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/, true /*links*/); codegen_progs_skeleton(obj, prog_cnt, true /*populate_links*/); codegen("\ @@ -1723,6 +1770,7 @@ static int do_subskeleton(int argc, char **argv) { \n\ struct %1$s *obj; \n\ struct bpf_object_subskeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ @@ -1786,7 +1834,7 @@ static int do_subskeleton(int argc, char **argv) } } - codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/, false /*links*/); codegen_progs_skeleton(obj, prog_cnt, false /*links*/); codegen("\ @@ -2379,15 +2427,6 @@ out: return err; } -static int btfgen_remap_id(__u32 *type_id, void *ctx) -{ - unsigned int *ids = ctx; - - *type_id = ids[*type_id]; - - return 0; -} - /* Generate BTF from relocation information previously recorded */ static struct btf *btfgen_get_btf(struct btfgen_info *info) { @@ -2467,10 +2506,15 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) /* second pass: fix up type ids */ for (i = 1; i < btf__type_cnt(btf_new); i++) { struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i); + struct btf_field_iter it; + __u32 *type_id; - err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids); + err = btf_field_iter_init(&it, btf_type, BTF_FIELD_ITER_IDS); if (err) goto err_out; + + while ((type_id = btf_field_iter_next(&it))) + *type_id = ids[*type_id]; } free(ids); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1a501cf09e78..2ff949ea82fa 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1813,6 +1813,10 @@ offload_dev: } if (pinmaps) { + err = create_and_mount_bpffs_dir(pinmaps); + if (err) + goto err_unpin; + err = bpf_object__pin_maps(obj, pinmaps); if (err) { p_err("failed to pin all maps"); @@ -2485,7 +2489,7 @@ static int do_help(int argc, char **argv) " cgroup/connect_unix | cgroup/getpeername4 | cgroup/getpeername6 |\n" " cgroup/getpeername_unix | cgroup/getsockname4 | cgroup/getsockname6 |\n" " cgroup/getsockname_unix | cgroup/sendmsg4 | cgroup/sendmsg6 |\n" - " cgroup/sendmsg°unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" + " cgroup/sendmsg_unix | cgroup/recvmsg4 | cgroup/recvmsg6 | cgroup/recvmsg_unix |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { sk_msg_verdict | sk_skb_verdict | sk_skb_stream_verdict |\n" diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index 7bdbcac3cf62..948dde25034e 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -29,6 +29,7 @@ enum bpf_link_type___local { }; extern const void bpf_link_fops __ksym; +extern const void bpf_link_fops_poll __ksym __weak; extern const void bpf_map_fops __ksym; extern const void bpf_prog_fops __ksym; extern const void btf_fops __ksym; @@ -84,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx) fops = &btf_fops; break; case BPF_OBJ_LINK: - fops = &bpf_link_fops; + if (&bpf_link_fops_poll && + file->f_op == &bpf_link_fops_poll) + fops = &bpf_link_fops_poll; + else + fops = &bpf_link_fops; break; default: return 0; diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 2f80edc682f1..f48c783cb9f7 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -40,17 +40,17 @@ struct { const volatile __u32 num_cpu = 1; const volatile __u32 num_metric = 1; -#define MAX_NUM_MATRICS 4 +#define MAX_NUM_METRICS 4 SEC("fentry/XXX") int BPF_PROG(fentry_XXX) { - struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local *ptrs[MAX_NUM_METRICS]; u32 key = bpf_get_smp_processor_id(); u32 i; /* look up before reading, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { u32 flag = i; ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); @@ -58,7 +58,7 @@ int BPF_PROG(fentry_XXX) return 0; } - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { struct bpf_perf_event_value___local reading; int err; @@ -99,14 +99,14 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after) SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { - struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local readings[MAX_NUM_METRICS]; u32 cpu = bpf_get_smp_processor_id(); u32 i, zero = 0; int err; u64 *count; /* read all events before updating the maps, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, (void *)(readings + i), sizeof(*readings)); @@ -116,7 +116,7 @@ int BPF_PROG(fexit_XXX) count = bpf_map_lookup_elem(&counts, &zero); if (count) { *count += 1; - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) fexit_update_maps(i, &readings[i]); } return 0; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index af393c7dee1f..d54aaa0619df 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -409,6 +409,14 @@ static int elf_collect(struct object *obj) obj->efile.idlist = data; obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; + } else if (!strcmp(name, BTF_BASE_ELF_SEC)) { + /* If a .BTF.base section is found, do not resolve + * BTF ids relative to vmlinux; resolve relative + * to the .BTF.base section instead. btf__parse_split() + * will take care of this once the base BTF it is + * passed is NULL. + */ + obj->base_btf_path = NULL; } if (compressed_section_fix(elf, scn, &sh)) @@ -696,7 +704,7 @@ static int sets_patch(struct object *obj) * Make sure id is at the beginning of the pairs * struct, otherwise the below qsort would not work. */ - BUILD_BUG_ON(set8->pairs != &set8->pairs[0].id); + BUILD_BUG_ON((u32 *)set8->pairs != &set8->pairs[0].id); qsort(set8->pairs, set8->cnt, sizeof(set8->pairs[0]), cmp_id); /* diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 4af6a9582f15..12796808f07a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -246,7 +246,7 @@ $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent $(OUTPUT)test-libtracefs.bin: - $(BUILD) $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -ltracefs + $(BUILD) $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -ltracefs $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto diff --git a/tools/gpio/gpio-sloppy-logic-analyzer.sh b/tools/gpio/gpio-sloppy-logic-analyzer.sh new file mode 100755 index 000000000000..ed21a110df5e --- /dev/null +++ b/tools/gpio/gpio-sloppy-logic-analyzer.sh @@ -0,0 +1,246 @@ +#!/bin/sh -eu +# SPDX-License-Identifier: GPL-2.0 +# +# Helper script for the Linux Kernel GPIO sloppy logic analyzer +# +# Copyright (C) Wolfram Sang <wsa@sang-engineering.com> +# Copyright (C) Renesas Electronics Corporation + +samplefreq=1000000 +numsamples=250000 +cpusetdefaultdir='/sys/fs/cgroup' +cpusetprefix='cpuset.' +debugdir='/sys/kernel/debug' +ladirname='gpio-sloppy-logic-analyzer' +outputdir="$PWD" +neededcmds='taskset zip' +max_chans=8 +duration= +initcpu= +listinstances=0 +lainstance= +lasysfsdir= +triggerdat= +trigger_bindat= +progname="${0##*/}" +print_help() +{ + cat << EOF +$progname - helper script for the Linux Kernel Sloppy GPIO Logic Analyzer +Available options: + -c|--cpu <n>: which CPU to isolate for sampling. Only needed once. Default <1>. + Remember that a more powerful CPU gives you higher sampling speeds. + Also CPU0 is not recommended as it usually does extra bookkeeping. + -d|--duration-us <SI-n>: number of microseconds to sample. Overrides -n, no default value. + -h|--help: print this help + -i|--instance <str>: name of the logic analyzer in case you have multiple instances. Default + to first instance found + -k|--kernel-debug-dir <str>: path to the kernel debugfs mountpoint. Default: <$debugdir> + -l|--list-instances: list all available instances + -n|--num_samples <SI-n>: number of samples to acquire. Default <$numsamples> + -o|--output-dir <str>: directory to put the result files. Default: current dir + -s|--sample_freq <SI-n>: desired sampling frequency. Might be capped if too large. + Default: <1000000> + -t|--trigger <str>: pattern to use as trigger. <str> consists of two-char pairs. First + char is channel number starting at "1". Second char is trigger level: + "L" - low; "H" - high; "R" - rising; "F" - falling + These pairs can be combined with "+", so "1H+2F" triggers when probe 1 + is high while probe 2 has a falling edge. You can have multiple triggers + combined with ",". So, "1H+2F,1H+2R" is like the example before but it + waits for a rising edge on probe 2 while probe 1 is still high after the + first trigger has been met. + Trigger data will only be used for the next capture and then be erased. + +<SI-n> is an integer value where SI units "T", "G", "M", "K" are recognized, e.g. '1M500K' is 1500000. + +Examples: +Samples $numsamples values at 1MHz with an already prepared CPU or automatically prepares CPU1 if needed, +use the first logic analyzer instance found: + '$progname' +Samples 50us at 2MHz waiting for a falling edge on channel 2. CPU and instance as above: + '$progname -d 50 -s 2M -t "2F"' + +Note that the process exits after checking all parameters but a sub-process still works in +the background. The result is only available once the sub-process finishes. + +Result is a .sr file to be consumed with PulseView from the free Sigrok project. It is +a zip file which also contains the binary sample data which may be consumed by others. +The filename is the logic analyzer instance name plus a since-epoch timestamp. +EOF +} + +fail() +{ + echo "$1" + exit 1 +} + +parse_si() +{ + conv_si="$(printf $1 | sed 's/[tT]+\?/*1000G+/g; s/[gG]+\?/*1000M+/g; s/[mM]+\?/*1000K+/g; s/[kK]+\?/*1000+/g; s/+$//')" + si_val="$((conv_si))" +} +set_newmask() +{ + for f in $(find "$1" -iname "$2"); do echo "$newmask" > "$f" 2>/dev/null || true; done +} + +init_cpu() +{ + isol_cpu="$1" + + [ -d "$lacpusetdir" ] || mkdir "$lacpusetdir" + + cur_cpu=$(cat "${lacpusetfile}cpus") + [ "$cur_cpu" = "$isol_cpu" ] && return + [ -z "$cur_cpu" ] || fail "CPU$isol_cpu requested but CPU$cur_cpu already isolated" + + echo "$isol_cpu" > "${lacpusetfile}cpus" || fail "Could not isolate CPU$isol_cpu. Does it exist?" + echo 1 > "${lacpusetfile}cpu_exclusive" + echo 0 > "${lacpusetfile}mems" + + oldmask=$(cat /proc/irq/default_smp_affinity) + newmask=$(printf "%x" $((0x$oldmask & ~(1 << isol_cpu)))) + + set_newmask '/proc/irq' '*smp_affinity' + set_newmask '/sys/devices/virtual/workqueue/' 'cpumask' + + # Move tasks away from isolated CPU + for p in $(ps -o pid | tail -n +2); do + mask=$(taskset -p "$p") || continue + # Ignore tasks with a custom mask, i.e. not equal $oldmask + [ "${mask##*: }" = "$oldmask" ] || continue + taskset -p "$newmask" "$p" || continue + done 2>/dev/null >/dev/null + + # Big hammer! Working with 'rcu_momentary_dyntick_idle()' for a more fine-grained solution + # still printed warnings. Same for re-enabling the stall detector after sampling. + echo 1 > /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress + + cpufreqgov="/sys/devices/system/cpu/cpu$isol_cpu/cpufreq/scaling_governor" + [ -w "$cpufreqgov" ] && echo 'performance' > "$cpufreqgov" || true +} + +parse_triggerdat() +{ + oldifs="$IFS" + IFS=','; for trig in $1; do + mask=0; val1=0; val2=0 + IFS='+'; for elem in $trig; do + chan=${elem%[lhfrLHFR]} + mode=${elem#$chan} + # Check if we could parse something and the channel number fits + [ "$chan" != "$elem" ] && [ "$chan" -le $max_chans ] || fail "Trigger syntax error: $elem" + bit=$((1 << (chan - 1))) + mask=$((mask | bit)) + case $mode in + [hH]) val1=$((val1 | bit)); val2=$((val2 | bit));; + [fF]) val1=$((val1 | bit));; + [rR]) val2=$((val2 | bit));; + esac + done + trigger_bindat="$trigger_bindat$(printf '\\%o\\%o' $mask $val1)" + [ $val1 -ne $val2 ] && trigger_bindat="$trigger_bindat$(printf '\\%o\\%o' $mask $val2)" + done + IFS="$oldifs" +} + +do_capture() +{ + taskset "$1" echo 1 > "$lasysfsdir"/capture || fail "Capture error! Check kernel log" + + srtmp=$(mktemp -d) + echo 1 > "$srtmp"/version + cp "$lasysfsdir"/sample_data "$srtmp"/logic-1-1 + cat > "$srtmp"/metadata << EOF +[global] +sigrok version=0.2.0 + +[device 1] +capturefile=logic-1 +total probes=$(wc -l < "$lasysfsdir"/meta_data) +samplerate=${samplefreq}Hz +unitsize=1 +EOF + cat "$lasysfsdir"/meta_data >> "$srtmp"/metadata + + zipname="$outputdir/${lasysfsdir##*/}-$(date +%s).sr" + zip -jq "$zipname" "$srtmp"/* + rm -rf "$srtmp" + delay_ack=$(cat "$lasysfsdir"/delay_ns_acquisition) + [ "$delay_ack" -eq 0 ] && delay_ack=1 + echo "Logic analyzer done. Saved '$zipname'" + echo "Max sample frequency this time: $((1000000000 / delay_ack))Hz." +} + +rep=$(getopt -a -l cpu:,duration-us:,help,instance:,list-instances,kernel-debug-dir:,num_samples:,output-dir:,sample_freq:,trigger: -o c:d:hi:k:ln:o:s:t: -- "$@") || exit 1 +eval set -- "$rep" +while true; do + case "$1" in + -c|--cpu) initcpu="$2"; shift;; + -d|--duration-us) parse_si $2; duration=$si_val; shift;; + -h|--help) print_help; exit 0;; + -i|--instance) lainstance="$2"; shift;; + -k|--kernel-debug-dir) debugdir="$2"; shift;; + -l|--list-instances) listinstances=1;; + -n|--num_samples) parse_si $2; numsamples=$si_val; shift;; + -o|--output-dir) outputdir="$2"; shift;; + -s|--sample_freq) parse_si $2; samplefreq=$si_val; shift;; + -t|--trigger) triggerdat="$2"; shift;; + --) break;; + *) fail "error parsing command line: $*";; + esac + shift +done + +for f in $neededcmds; do + command -v "$f" >/dev/null || fail "Command '$f' not found" +done + +# print cpuset mountpoint if any, errorcode > 0 if noprefix option was found +cpusetdir=$(awk '$3 == "cgroup" && $4 ~ /cpuset/ { print $2; exit (match($4, /noprefix/) > 0) }' /proc/self/mounts) || cpusetprefix='' +if [ -z "$cpusetdir" ]; then + cpusetdir="$cpusetdefaultdir" + [ -d $cpusetdir ] || mkdir $cpusetdir + mount -t cgroup -o cpuset none $cpusetdir || fail "Couldn't mount cpusets. Not in kernel or already in use?" +fi + +lacpusetdir="$cpusetdir/$ladirname" +lacpusetfile="$lacpusetdir/$cpusetprefix" +sysfsdir="$debugdir/$ladirname" + +[ "$samplefreq" -ne 0 ] || fail "Invalid sample frequency" + +[ -d "$sysfsdir" ] || fail "Could not find logic analyzer root dir '$sysfsdir'. Module loaded?" +[ -x "$sysfsdir" ] || fail "Could not access logic analyzer root dir '$sysfsdir'. Need root?" + +[ $listinstances -gt 0 ] && find "$sysfsdir" -mindepth 1 -type d | sed 's|.*/||' && exit 0 + +if [ -n "$lainstance" ]; then + lasysfsdir="$sysfsdir/$lainstance" +else + lasysfsdir=$(find "$sysfsdir" -mindepth 1 -type d -print -quit) +fi +[ -d "$lasysfsdir" ] || fail "Logic analyzer directory '$lasysfsdir' not found!" +[ -d "$outputdir" ] || fail "Output directory '$outputdir' not found!" + +[ -n "$initcpu" ] && init_cpu "$initcpu" +[ -d "$lacpusetdir" ] || { echo "Auto-Isolating CPU1"; init_cpu 1; } + +ndelay=$((1000000000 / samplefreq)) +echo "$ndelay" > "$lasysfsdir"/delay_ns + +[ -n "$duration" ] && numsamples=$((samplefreq * duration / 1000000)) +echo $numsamples > "$lasysfsdir"/buf_size + +if [ -n "$triggerdat" ]; then + parse_triggerdat "$triggerdat" + printf "$trigger_bindat" > "$lasysfsdir"/trigger 2>/dev/null || fail "Trigger data '$triggerdat' rejected" +fi + +workcpu=$(cat "${lacpusetfile}effective_cpus") +[ -n "$workcpu" ] || fail "No isolated CPU found" +cpumask=$(printf '%x' $((1 << workcpu))) +instance=${lasysfsdir##*/} +echo "Setting up '$instance': $numsamples samples at ${samplefreq}Hz with ${triggerdat:-no} trigger using CPU$workcpu" +do_capture "$cpumask" & diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bb52871da341..2e60e2c212cd 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -17,6 +17,7 @@ endif MAKEFLAGS += -r override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -Wno-address-of-packed-member ALL_TARGETS := hv_kvp_daemon hv_vss_daemon ifneq ($(ARCH), aarch64) diff --git a/tools/include/asm/rwonce.h b/tools/include/asm/rwonce.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/asm/rwonce.h diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 210c13b1b857..2a7f260ef9dc 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -19,7 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); bool __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); -void bitmap_clear(unsigned long *map, unsigned int start, int len); +void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); @@ -150,4 +150,19 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static inline void bitmap_clear(unsigned long *map, unsigned int start, + unsigned int nbits) +{ + if (__builtin_constant_p(nbits) && nbits == 1) + __clear_bit(start, map); + else if (small_const_nbits(start + nbits)) + *map &= ~GENMASK(start + nbits - 1, start); + else if (__builtin_constant_p(start & BITMAP_MEM_MASK) && + IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) && + __builtin_constant_p(nbits & BITMAP_MEM_MASK) && + IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT)) + memset((char *)map + start / 8, 0, nbits / 8); + else + __bitmap_clear(map, start, nbits); +} #endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 8a63a9913495..6f7f22ac9da5 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -62,6 +62,10 @@ #define __nocf_check __attribute__((nocf_check)) #endif +#ifndef __naked +#define __naked __attribute__((__naked__)) +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index dc0fc7125bc3..cad4f2927983 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -12,6 +12,7 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) +#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE) #define __va(x) ((void *)((unsigned long)(x))) #define __pa(x) ((unsigned long)(x)) diff --git a/tools/include/linux/numa.h b/tools/include/linux/numa.h index 110b0e5d0fb0..c8b9369335e0 100644 --- a/tools/include/linux/numa.h +++ b/tools/include/linux/numa.h @@ -13,4 +13,9 @@ #define NUMA_NO_NODE (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + #endif /* _LINUX_NUMA_H */ diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h index 2e6338ac5eed..e530e54046c9 100644 --- a/tools/include/linux/poison.h +++ b/tools/include/linux/poison.h @@ -47,11 +47,8 @@ * Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */ -#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ -#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ - -#define SLUB_RED_INACTIVE 0xbb -#define SLUB_RED_ACTIVE 0xcc +#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ +#define SLUB_RED_ACTIVE 0xcc /* when obj is active */ /* ...and for poisoning */ #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index 6665e272e213..cd79ddd6170e 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -96,6 +96,10 @@ typedef uint64_t uintmax_t; #define UINT_FAST32_MAX SIZE_MAX #define UINT_FAST64_MAX UINT64_MAX +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + #ifndef INT_MIN #define INT_MIN (-__INT_MAX__ - 1) #endif @@ -110,4 +114,19 @@ typedef uint64_t uintmax_t; #define LONG_MAX __LONG_MAX__ #endif +#ifndef ULONG_MAX +#define ULONG_MAX ((unsigned long)(__LONG_MAX__) * 2 + 1) +#endif + +#ifndef LLONG_MIN +#define LLONG_MIN (-__LONG_LONG_MAX__ - 1) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX __LONG_LONG_MAX__ +#endif + +#ifndef ULLONG_MAX +#define ULLONG_MAX ((unsigned long long)(__LONG_LONG_MAX__) * 2 + 1) +#endif + #endif /* _NOLIBC_STDINT_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 16cd4d807251..c968dbbc4ef8 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -376,6 +376,16 @@ int setvbuf(FILE *stream __attribute__((unused)), return 0; } +static __attribute__((unused)) +const char *strerror(int errno) +{ + static char buf[18] = "errno="; + + i64toa_r(errno, &buf[6]); + + return buf; +} + /* make sure to include all global symbols */ #include "nolibc.h" diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 5be9d3c7435a..75aa273c23a6 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -438,6 +438,115 @@ char *u64toa(uint64_t in) return itoa_buffer; } +static __attribute__((unused)) +uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit) +{ + const char signed_ = lower_limit != 0; + unsigned char neg = 0, overflow = 0; + uintmax_t val = 0, limit, old_val; + char c; + + if (base < 0 || base > 36) { + SET_ERRNO(EINVAL); + goto out; + } + + while (isspace(*nptr)) + nptr++; + + if (*nptr == '+') { + nptr++; + } else if (*nptr == '-') { + neg = 1; + nptr++; + } + + if (signed_ && neg) + limit = -(uintmax_t)lower_limit; + else + limit = upper_limit; + + if ((base == 0 || base == 16) && + (strncmp(nptr, "0x", 2) == 0 || strncmp(nptr, "0X", 2) == 0)) { + base = 16; + nptr += 2; + } else if (base == 0 && strncmp(nptr, "0", 1) == 0) { + base = 8; + nptr += 1; + } else if (base == 0) { + base = 10; + } + + while (*nptr) { + c = *nptr; + + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'z') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + c = c - 'A' + 10; + else + goto out; + + if (c >= base) + goto out; + + nptr++; + old_val = val; + val *= base; + val += c; + + if (val > limit || val < old_val) + overflow = 1; + } + +out: + if (overflow) { + SET_ERRNO(ERANGE); + val = limit; + } + if (endptr) + *endptr = (char *)nptr; + return neg ? -val : val; +} + +static __attribute__((unused)) +long strtol(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LONG_MIN, LONG_MAX); +} + +static __attribute__((unused)) +unsigned long strtoul(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULONG_MAX); +} + +static __attribute__((unused)) +long long strtoll(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LLONG_MIN, LLONG_MAX); +} + +static __attribute__((unused)) +unsigned long long strtoull(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULLONG_MAX); +} + +static __attribute__((unused)) +intmax_t strtoimax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, INTMAX_MIN, INTMAX_MAX); +} + +static __attribute__((unused)) +uintmax_t strtoumax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); +} + /* make sure to include all global symbols */ #include "nolibc.h" diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d983c48a3b6a..a00d53d02723 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -776,12 +776,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) - -#ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) -#endif - #define __NR_close_range 436 __SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..35bcf52dbc65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -6207,12 +6209,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h new file mode 100644 index 000000000000..8a27bc5c7a7f --- /dev/null +++ b/tools/include/uapi/linux/fs.h @@ -0,0 +1,552 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FS_H +#define _UAPI_LINUX_FS_H + +/* + * This file has definitions for some important file table structures + * and constants and structures used by various generic file system + * ioctl's. Please do not make any changes in this file before + * sending patches for review to linux-fsdevel@vger.kernel.org and + * linux-api@vger.kernel.org. + */ + +#include <linux/limits.h> +#include <linux/ioctl.h> +#include <linux/types.h> +#ifndef __KERNEL__ +#include <linux/fscrypt.h> +#endif + +/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */ +#if !defined(__KERNEL__) +#include <linux/mount.h> +#endif + +/* + * It's silly to have NR_OPEN bigger than NR_FILE, but you can change + * the file limit at runtime and only root can increase the per-process + * nr_file rlimit, so it's safe to set up a ridiculously high absolute + * upper limit on files-per-process. + * + * Some programs (notably those using select()) may have to be + * recompiled to take full advantage of the new limits.. + */ + +/* Fixed constants first: */ +#undef NR_OPEN +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ + +#define BLOCK_SIZE_BITS 10 +#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) + +#define SEEK_SET 0 /* seek relative to beginning of file */ +#define SEEK_CUR 1 /* seek relative to current file position */ +#define SEEK_END 2 /* seek relative to end of file */ +#define SEEK_DATA 3 /* seek to the next data */ +#define SEEK_HOLE 4 /* seek to the next hole */ +#define SEEK_MAX SEEK_HOLE + +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ + +struct file_clone_range { + __s64 src_fd; + __u64 src_offset; + __u64 src_length; + __u64 dest_offset; +}; + +struct fstrim_range { + __u64 start; + __u64 len; + __u64 minlen; +}; + +/* + * We include a length field because some filesystems (vfat) have an identifier + * that we do want to expose as a UUID, but doesn't have the standard length. + * + * We use a fixed size buffer beacuse this interface will, by fiat, never + * support "UUIDs" longer than 16 bytes; we don't want to force all downstream + * users to have to deal with that. + */ +struct fsuuid2 { + __u8 len; + __u8 uuid[16]; +}; + +struct fs_sysfs_path { + __u8 len; + __u8 name[128]; +}; + +/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */ +#define FILE_DEDUPE_RANGE_SAME 0 +#define FILE_DEDUPE_RANGE_DIFFERS 1 + +/* from struct btrfs_ioctl_file_extent_same_info */ +struct file_dedupe_range_info { + __s64 dest_fd; /* in - destination file */ + __u64 dest_offset; /* in - start of extent in destination */ + __u64 bytes_deduped; /* out - total # of bytes we were able + * to dedupe from this file. */ + /* status of this dedupe operation: + * < 0 for error + * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds + * == FILE_DEDUPE_RANGE_DIFFERS if data differs + */ + __s32 status; /* out - see above description */ + __u32 reserved; /* must be zero */ +}; + +/* from struct btrfs_ioctl_file_extent_same_args */ +struct file_dedupe_range { + __u64 src_offset; /* in - start of extent in source */ + __u64 src_length; /* in - length of extent */ + __u16 dest_count; /* in - total elements in info array */ + __u16 reserved1; /* must be zero */ + __u32 reserved2; /* must be zero */ + struct file_dedupe_range_info info[]; +}; + +/* And dynamically-tunable limits and defaults: */ +struct files_stat_struct { + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ +}; + +struct inodes_stat_t { + long nr_inodes; + long nr_unused; + long dummy[5]; /* padding for sysctl ABI compatibility */ +}; + + +#define NR_FILE 8192 /* this can well be larger on a larger system */ + +/* + * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR. + */ +struct fsxattr { + __u32 fsx_xflags; /* xflags field value (get/set) */ + __u32 fsx_extsize; /* extsize field value (get/set)*/ + __u32 fsx_nextents; /* nextents field value (get) */ + __u32 fsx_projid; /* project identifier (get/set) */ + __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/ + unsigned char fsx_pad[8]; +}; + +/* + * Flags for the fsx_xflags field + */ +#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */ +#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */ +#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */ +#define FS_XFLAG_APPEND 0x00000010 /* all writes append */ +#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */ +#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */ +#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */ +#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */ +#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */ +#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */ +#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */ +#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */ +#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */ +#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ +#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ +#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ + +/* the read-only stuff doesn't really belong here, but any other place is + probably as bad and I don't want to create yet another include file. */ + +#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */ +#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */ +#define BLKRRPART _IO(0x12,95) /* re-read partition table */ +#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */ +#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */ +#define BLKRASET _IO(0x12,98) /* set read ahead for block device */ +#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */ +#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */ +#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */ +#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */ +#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */ +#define BLKSSZGET _IO(0x12,104)/* get block device sector size */ +#if 0 +#define BLKPG _IO(0x12,105)/* See blkpg.h */ + +/* Some people are morons. Do not use sizeof! */ + +#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */ +#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */ +/* This was here just to show that the number is taken - + probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */ +#endif +/* A jump here: 108-111 have been used for various private purposes. */ +#define BLKBSZGET _IOR(0x12,112,size_t) +#define BLKBSZSET _IOW(0x12,113,size_t) +#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */ +#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup) +#define BLKTRACESTART _IO(0x12,116) +#define BLKTRACESTOP _IO(0x12,117) +#define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) +#define BLKDISCARDZEROES _IO(0x12,124) +#define BLKSECDISCARD _IO(0x12,125) +#define BLKROTATIONAL _IO(0x12,126) +#define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) +/* + * A jump here: 130-136 are reserved for zoned block devices + * (see uapi/linux/blkzoned.h) + */ + +#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ +#define FIBMAP _IO(0x00,1) /* bmap access */ +#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ +#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ +#define FICLONE _IOW(0x94, 9, int) +#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range) +#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range) + +#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */ + +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) +#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr) +#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr) +#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX]) +#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX]) +/* Returns the external filesystem UUID, the same one blkid returns */ +#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2) +/* + * Returns the path component under /sys/fs/ that refers to this filesystem; + * also /sys/kernel/debug/ for filesystems with debugfs exports + */ +#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path) + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + * + * Note: for historical reasons, these flags were originally used and + * defined for use by ext2/ext3, and then other file systems started + * using these flags so they wouldn't need to write their own version + * of chattr/lsattr (which was shipped as part of e2fsprogs). You + * should think twice before trying to use these flags in new + * contexts, or trying to assign these flags, since they are used both + * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are + * almost out of 32-bit flags. :-) + * + * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from + * XFS to the generic FS level interface. This uses a structure that + * has padding and hence has more room to grow, so it may be more + * appropriate for many new use cases. + * + * Please do not change these flags or interfaces before checking with + * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org. + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +/* End compression flags --- maybe not all used */ +#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */ +#define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_VERITY_FL 0x00100000 /* Verity protected inode */ +#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ +#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */ +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_DAX_FL 0x02000000 /* Inode is DAX */ +#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ +#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ +#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + + +#define SYNC_FILE_RANGE_WAIT_BEFORE 1 +#define SYNC_FILE_RANGE_WRITE 2 +#define SYNC_FILE_RANGE_WAIT_AFTER 4 +#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \ + SYNC_FILE_RANGE_WAIT_BEFORE | \ + SYNC_FILE_RANGE_WAIT_AFTER) + +/* + * Flags for preadv2/pwritev2: + */ + +typedef int __bitwise __kernel_rwf_t; + +/* high priority request, poll if possible */ +#define RWF_HIPRI ((__force __kernel_rwf_t)0x00000001) + +/* per-IO O_DSYNC */ +#define RWF_DSYNC ((__force __kernel_rwf_t)0x00000002) + +/* per-IO O_SYNC */ +#define RWF_SYNC ((__force __kernel_rwf_t)0x00000004) + +/* per-IO, return -EAGAIN if operation would block */ +#define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) + +/* per-IO O_APPEND */ +#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) + +/* per-IO negation of O_APPEND */ +#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) + +/* mask of flags supported by the kernel */ +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ + RWF_APPEND | RWF_NOAPPEND) + +#define PROCFS_IOCTL_MAGIC 'f' + +/* Pagemap ioctl */ +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) + +/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ +#define PAGE_IS_WPALLOWED (1 << 0) +#define PAGE_IS_WRITTEN (1 << 1) +#define PAGE_IS_FILE (1 << 2) +#define PAGE_IS_PRESENT (1 << 3) +#define PAGE_IS_SWAPPED (1 << 4) +#define PAGE_IS_PFNZERO (1 << 5) +#define PAGE_IS_HUGE (1 << 6) +#define PAGE_IS_SOFT_DIRTY (1 << 7) + +/* + * struct page_region - Page region with flags + * @start: Start of the region + * @end: End of the region (exclusive) + * @categories: PAGE_IS_* category bitmask for the region + */ +struct page_region { + __u64 start; + __u64 end; + __u64 categories; +}; + +/* Flags for PAGEMAP_SCAN ioctl */ +#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */ +#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */ + +/* + * struct pm_scan_arg - Pagemap ioctl argument + * @size: Size of the structure + * @flags: Flags for the IOCTL + * @start: Starting address of the region + * @end: Ending address of the region + * @walk_end Address where the scan stopped (written by kernel). + * walk_end == end (address tags cleared) informs that the scan completed on entire range. + * @vec: Address of page_region struct array for output + * @vec_len: Length of the page_region struct array + * @max_pages: Optional limit for number of returned pages (0 = disabled) + * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1 + * @category_mask: Skip pages for which any category doesn't match + * @category_anyof_mask: Skip pages for which no category matches + * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned + */ +struct pm_scan_arg { + __u64 size; + __u64 flags; + __u64 start; + __u64 end; + __u64 walk_end; + __u64 vec; + __u64 vec_len; + __u64 max_pages; + __u64 category_inverted; + __u64 category_mask; + __u64 category_anyof_mask; + __u64 return_mask; +}; + +/* /proc/<pid>/maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc/<pid>/maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + +#endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 638c606dfa74..2f082b01ff22 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -41,6 +41,10 @@ */ #define XDP_UMEM_TX_SW_CSUM (1 << 1) +/* Request to reserve tx_metadata_len bytes of per-chunk metadata. + */ +#define XDP_UMEM_TX_METADATA_LEN (1 << 2) + struct sockaddr_xdp { __u16 sxdp_family; __u16 sxdp_flags; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index d03842abae57..e5af8c692dc0 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_PRE_FAULT_MEMORY 236 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1548,4 +1549,13 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) + +struct kvm_pre_fault_memory { + __u64 gpa; + __u64 size; + __u64 flags; + __u64 padding[5]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index a246e11988d5..e89d00528f2f 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -17,6 +17,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h new file mode 100644 index 000000000000..35791791a879 --- /dev/null +++ b/tools/include/uapi/linux/prctl.h @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_PRCTL_H +#define _LINUX_PRCTL_H + +#include <linux/types.h> + +/* Values to pass as first argument to prctl() */ + +#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */ +#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */ + +/* Get/set current->mm->dumpable */ +#define PR_GET_DUMPABLE 3 +#define PR_SET_DUMPABLE 4 + +/* Get/set unaligned access control bits (if meaningful) */ +#define PR_GET_UNALIGN 5 +#define PR_SET_UNALIGN 6 +# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */ +# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */ + +/* Get/set whether or not to drop capabilities on setuid() away from + * uid 0 (as per security/commoncap.c) */ +#define PR_GET_KEEPCAPS 7 +#define PR_SET_KEEPCAPS 8 + +/* Get/set floating-point emulation control bits (if meaningful) */ +#define PR_GET_FPEMU 9 +#define PR_SET_FPEMU 10 +# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */ +# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */ + +/* Get/set floating-point exception mode (if meaningful) */ +#define PR_GET_FPEXC 11 +#define PR_SET_FPEXC 12 +# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */ +# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */ +# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */ +# define PR_FP_EXC_UND 0x040000 /* floating point underflow */ +# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */ +# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */ +# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */ +# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */ +# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */ +# define PR_FP_EXC_PRECISE 3 /* precise exception mode */ + +/* Get/set whether we use statistical process timing or accurate timestamp + * based process timing */ +#define PR_GET_TIMING 13 +#define PR_SET_TIMING 14 +# define PR_TIMING_STATISTICAL 0 /* Normal, traditional, + statistical process timing */ +# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based + process timing */ + +#define PR_SET_NAME 15 /* Set process name */ +#define PR_GET_NAME 16 /* Get process name */ + +/* Get/set process endian */ +#define PR_GET_ENDIAN 19 +#define PR_SET_ENDIAN 20 +# define PR_ENDIAN_BIG 0 +# define PR_ENDIAN_LITTLE 1 /* True little endian mode */ +# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ + +/* Get/set process seccomp mode */ +#define PR_GET_SECCOMP 21 +#define PR_SET_SECCOMP 22 + +/* Get/set the capability bounding set (as per security/commoncap.c) */ +#define PR_CAPBSET_READ 23 +#define PR_CAPBSET_DROP 24 + +/* Get/set the process' ability to use the timestamp counter instruction */ +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ + +/* Get/set securebits (as per security/commoncap.c) */ +#define PR_GET_SECUREBITS 27 +#define PR_SET_SECUREBITS 28 + +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + +#define PR_TASK_PERF_EVENTS_DISABLE 31 +#define PR_TASK_PERF_EVENTS_ENABLE 32 + +/* + * Set early/late kill mode for hwpoison memory corruption. + * This influences when the process gets killed on a memory corruption. + */ +#define PR_MCE_KILL 33 +# define PR_MCE_KILL_CLEAR 0 +# define PR_MCE_KILL_SET 1 + +# define PR_MCE_KILL_LATE 0 +# define PR_MCE_KILL_EARLY 1 +# define PR_MCE_KILL_DEFAULT 2 + +#define PR_MCE_KILL_GET 34 + +/* + * Tune up process memory map specifics. + */ +#define PR_SET_MM 35 +# define PR_SET_MM_START_CODE 1 +# define PR_SET_MM_END_CODE 2 +# define PR_SET_MM_START_DATA 3 +# define PR_SET_MM_END_DATA 4 +# define PR_SET_MM_START_STACK 5 +# define PR_SET_MM_START_BRK 6 +# define PR_SET_MM_BRK 7 +# define PR_SET_MM_ARG_START 8 +# define PR_SET_MM_ARG_END 9 +# define PR_SET_MM_ENV_START 10 +# define PR_SET_MM_ENV_END 11 +# define PR_SET_MM_AUXV 12 +# define PR_SET_MM_EXE_FILE 13 +# define PR_SET_MM_MAP 14 +# define PR_SET_MM_MAP_SIZE 15 + +/* + * This structure provides new memory descriptor + * map which mostly modifies /proc/pid/stat[m] + * output for a task. This mostly done in a + * sake of checkpoint/restore functionality. + */ +struct prctl_mm_map { + __u64 start_code; /* code section bounds */ + __u64 end_code; + __u64 start_data; /* data section bounds */ + __u64 end_data; + __u64 start_brk; /* heap for brk() syscall */ + __u64 brk; + __u64 start_stack; /* stack starts at */ + __u64 arg_start; /* command line arguments bounds */ + __u64 arg_end; + __u64 env_start; /* environment variables bounds */ + __u64 env_end; + __u64 *auxv; /* auxiliary vector */ + __u32 auxv_size; /* vector size */ + __u32 exe_fd; /* /proc/$pid/exe link file */ +}; + +/* + * Set specific pid that is allowed to ptrace the current task. + * A value of 0 mean "no process". + */ +#define PR_SET_PTRACER 0x59616d61 +# define PR_SET_PTRACER_ANY ((unsigned long)-1) + +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + +/* + * If no_new_privs is set, then operations that grant new privileges (i.e. + * execve) will either fail or not grant them. This affects suid/sgid, + * file capabilities, and LSMs. + * + * Operations that merely manipulate or drop existing privileges (setresuid, + * capset, etc.) will still work. Drop those privileges if you want them gone. + * + * Changing LSM security domain is considered a new privilege. So, for example, + * asking selinux for a specific new context (e.g. with runcon) will result + * in execve returning -EPERM. + * + * See Documentation/userspace-api/no_new_privs.rst for more details. + */ +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 + +#define PR_SET_THP_DISABLE 41 +#define PR_GET_THP_DISABLE 42 + +/* + * No longer implemented, but left here to ensure the numbers stay reserved: + */ +#define PR_MPX_ENABLE_MANAGEMENT 43 +#define PR_MPX_DISABLE_MANAGEMENT 44 + +#define PR_SET_FP_MODE 45 +#define PR_GET_FP_MODE 46 +# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */ +# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */ + +/* Control the ambient capability set */ +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 + +/* arm64 Scalable Vector Extension controls */ +/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */ +#define PR_SVE_SET_VL 50 /* set task vector length */ +# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SVE_GET_VL 51 /* get task vector length */ +/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */ +# define PR_SVE_VL_LEN_MASK 0xffff +# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ + +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +# define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) +# define PR_SPEC_DISABLE_NOEXEC (1UL << 4) + +/* Reset arm64 pointer authentication keys */ +#define PR_PAC_RESET_KEYS 54 +# define PR_PAC_APIAKEY (1UL << 0) +# define PR_PAC_APIBKEY (1UL << 1) +# define PR_PAC_APDAKEY (1UL << 2) +# define PR_PAC_APDBKEY (1UL << 3) +# define PR_PAC_APGAKEY (1UL << 4) + +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_NONE 0UL +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 + +/* Control reclaim behavior when allocating memory */ +#define PR_SET_IO_FLUSHER 57 +#define PR_GET_IO_FLUSHER 58 + +/* Dispatch syscalls to a userspace handler */ +#define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +/* The control values for the user space selector when dispatch is enabled */ +# define SYSCALL_DISPATCH_FILTER_ALLOW 0 +# define SYSCALL_DISPATCH_FILTER_BLOCK 1 + +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 +# define PR_SCHED_CORE_SCOPE_THREAD 0 +# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 + +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + +/* Memory deny write / execute */ +#define PR_SET_MDWE 65 +# define PR_MDWE_REFUSE_EXEC_GAIN (1UL << 0) +# define PR_MDWE_NO_INHERIT (1UL << 1) + +#define PR_GET_MDWE 66 + +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + +#define PR_GET_AUXV 0x41555856 + +#define PR_SET_MEMORY_MERGE 67 +#define PR_GET_MEMORY_MERGE 68 + +#define PR_RISCV_V_SET_CONTROL 69 +#define PR_RISCV_V_GET_CONTROL 70 +# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0 +# define PR_RISCV_V_VSTATE_CTRL_OFF 1 +# define PR_RISCV_V_VSTATE_CTRL_ON 2 +# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4) +# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3 +# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc +# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f + +#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71 +# define PR_RISCV_CTX_SW_FENCEI_ON 0 +# define PR_RISCV_CTX_SW_FENCEI_OFF 1 +# define PR_RISCV_SCOPE_PER_PROCESS 0 +# define PR_RISCV_SCOPE_PER_THREAD 1 + +/* PowerPC Dynamic Execution Control Register (DEXCR) controls */ +#define PR_PPC_GET_DEXCR 72 +#define PR_PPC_SET_DEXCR 73 +/* DEXCR aspect to act on */ +# define PR_PPC_DEXCR_SBHE 0 /* Speculative branch hint enable */ +# define PR_PPC_DEXCR_IBRTPD 1 /* Indirect branch recurrent target prediction disable */ +# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address prediction disable */ +# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction enable */ +/* Action to apply / return */ +# define PR_PPC_DEXCR_CTRL_EDITABLE 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */ +# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this process */ +# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this process */ +# define PR_PPC_DEXCR_CTRL_SET_ONEXEC 0x8 /* Set the aspect on exec */ +# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ +# define PR_PPC_DEXCR_CTRL_MASK 0x1f + +#endif /* _LINUX_PRCTL_H */ diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index c3e4871967bc..2178862bb114 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -100,3 +100,23 @@ bool __bitmap_intersects(const unsigned long *bitmap1, return true; return false; } + +void __bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b6619199a706..e2cd558ca0b4 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o features.o + usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0840ef599a..32c00db3b91b 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -116,6 +116,9 @@ struct btf { /* whether strings are already deduplicated */ bool strs_deduped; + /* whether base_btf should be freed in btf_free for this instance */ + bool owns_base; + /* BTF object FD, if loaded into kernel */ int fd; @@ -598,7 +601,7 @@ static int btf_sanity_check(const struct btf *btf) __u32 i, n = btf__type_cnt(btf); int err; - for (i = 1; i < n; i++) { + for (i = btf->start_id; i < n; i++) { t = btf_type_by_id(btf, i); err = btf_validate_type(btf, t, i); if (err) @@ -969,6 +972,8 @@ void btf__free(struct btf *btf) free(btf->raw_data); free(btf->raw_data_swapped); free(btf->type_offs); + if (btf->owns_base) + btf__free(btf->base_btf); free(btf); } @@ -1084,53 +1089,38 @@ struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) return libbpf_ptr(btf_new(data, size, base_btf)); } -static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, - struct btf_ext **btf_ext) +struct btf_elf_secs { + Elf_Data *btf_data; + Elf_Data *btf_ext_data; + Elf_Data *btf_base_data; +}; + +static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs) { - Elf_Data *btf_data = NULL, *btf_ext_data = NULL; - int err = 0, fd = -1, idx = 0; - struct btf *btf = NULL; Elf_Scn *scn = NULL; - Elf *elf = NULL; + Elf_Data *data; GElf_Ehdr ehdr; size_t shstrndx; + int idx = 0; - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", path); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); - return ERR_PTR(err); - } - - err = -LIBBPF_ERRNO__FORMAT; - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_warn("failed to open %s as ELF file\n", path); - goto done; - } if (!gelf_getehdr(elf, &ehdr)) { pr_warn("failed to get EHDR from %s\n", path); - goto done; + goto err; } if (elf_getshdrstrndx(elf, &shstrndx)) { pr_warn("failed to get section names section index for %s\n", path); - goto done; + goto err; } if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); - goto done; + goto err; } while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data **field; GElf_Shdr sh; char *name; @@ -1138,42 +1128,102 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (gelf_getshdr(scn, &sh) != &sh) { pr_warn("failed to get section(%d) header from %s\n", idx, path); - goto done; + goto err; } name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); - goto done; + goto err; } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = elf_getdata(scn, 0); - if (!btf_ext_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } + + if (strcmp(name, BTF_ELF_SEC) == 0) + field = &secs->btf_data; + else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) + field = &secs->btf_ext_data; + else if (strcmp(name, BTF_BASE_ELF_SEC) == 0) + field = &secs->btf_base_data; + else continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto err; } + *field = data; } - if (!btf_data) { + return 0; + +err: + return -LIBBPF_ERRNO__FORMAT; +} + +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) +{ + struct btf_elf_secs secs = {}; + struct btf *dist_base_btf = NULL; + struct btf *btf = NULL; + int err = 0, fd = -1; + Elf *elf = NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open %s: %s\n", path, strerror(errno)); + return ERR_PTR(err); + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + pr_warn("failed to open %s as ELF file\n", path); + goto done; + } + + err = btf_find_elf_sections(elf, path, &secs); + if (err) + goto done; + + if (!secs.btf_data) { pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); err = -ENODATA; goto done; } - btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); - err = libbpf_get_error(btf); - if (err) + + if (secs.btf_base_data) { + dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, + NULL); + if (IS_ERR(dist_base_btf)) { + err = PTR_ERR(dist_base_btf); + dist_base_btf = NULL; + goto done; + } + } + + btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, + dist_base_btf ?: base_btf); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); goto done; + } + if (dist_base_btf && base_btf) { + err = btf__relocate(btf, base_btf); + if (err) + goto done; + btf__free(dist_base_btf); + dist_base_btf = NULL; + } + + if (dist_base_btf) + btf->owns_base = true; switch (gelf_getclass(elf)) { case ELFCLASS32: @@ -1187,11 +1237,12 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, break; } - if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); - err = libbpf_get_error(*btf_ext); - if (err) + if (btf_ext && secs.btf_ext_data) { + *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) { + err = PTR_ERR(*btf_ext); goto done; + } } else if (btf_ext) { *btf_ext = NULL; } @@ -1205,6 +1256,7 @@ done: if (btf_ext) btf_ext__free(*btf_ext); + btf__free(dist_base_btf); btf__free(btf); return ERR_PTR(err); @@ -1739,9 +1791,8 @@ struct btf_pipe { struct hashmap *str_off_map; /* map string offsets from src to dst */ }; -static int btf_rewrite_str(__u32 *str_off, void *ctx) +static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off) { - struct btf_pipe *p = ctx; long mapped_off; int off, err; @@ -1771,10 +1822,11 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx) return 0; } -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) { - struct btf_pipe p = { .src = src_btf, .dst = btf }; + struct btf_field_iter it; struct btf_type *t; + __u32 *str_off; int sz, err; sz = btf_type_size(src_type); @@ -1782,35 +1834,33 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) + if (btf_ensure_modifiable(p->dst)) return libbpf_err(-ENOMEM); - t = btf_add_type_mem(btf, sz); + t = btf_add_type_mem(p->dst, sz); if (!t) return libbpf_err(-ENOMEM); memcpy(t, src_type, sz); - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return libbpf_err(err); - return btf_commit_type(btf, sz); + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(p, str_off); + if (err) + return libbpf_err(err); + } + + return btf_commit_type(p->dst, sz); } -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) { - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; + struct btf_pipe p = { .src = src_btf, .dst = btf }; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; + return btf_add_type(&p, src_type); } static size_t btf_dedup_identity_hash_fn(long key, void *ctx); @@ -1858,6 +1908,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) memcpy(t, src_btf->types_data, data_sz); for (i = 0; i < cnt; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + sz = btf_type_size(t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ @@ -1869,15 +1922,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) *off = t - btf->types_data; /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) goto err_out; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + goto err_out; + } /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) goto err_out; + while ((type_id = btf_field_iter_next(&it))) { + if (!*type_id) /* nothing to do for VOID references */ + continue; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + } + /* go to next type data and type offset index entry */ t += sz; off++; @@ -3453,11 +3522,19 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void * int i, r; for (i = 0; i < d->btf->nr_types; i++) { + struct btf_field_iter it; struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + __u32 *str_off; - r = btf_type_visit_str_offs(t, fn, ctx); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (r) return r; + + while ((str_off = btf_field_iter_next(&it))) { + r = fn(str_off, ctx); + if (r) + return r; + } } if (!d->btf_ext) @@ -4919,10 +4996,23 @@ static int btf_dedup_remap_types(struct btf_dedup *d) for (i = 0; i < d->btf->nr_types; i++) { struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + struct btf_field_iter it; + __u32 *type_id; - r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (r) return r; + + while ((type_id = btf_field_iter_next(&it))) { + __u32 resolved_id, new_id; + + resolved_id = resolve_type_id(d, *type_id); + new_id = d->hypot_map[resolved_id]; + if (new_id > BTF_MAX_NR_TYPES) + return -EINVAL; + + *type_id = new_id; + } } if (!d->btf_ext) @@ -5003,136 +5093,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt return btf__parse_split(path, vmlinux_btf); } -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) -{ - int i, n, err; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_FLOAT: - case BTF_KIND_ENUM: - case BTF_KIND_ENUM64: - return 0; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: - return visit(&t->type, ctx); - - case BTF_KIND_ARRAY: { - struct btf_array *a = btf_array(t); - - err = visit(&a->type, ctx); - err = err ?: visit(&a->index_type, ctx); - return err; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - err = visit(&t->type, ctx); - if (err) - return err; - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *m = btf_var_secinfos(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - default: - return -EINVAL; - } -} - -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) -{ - int i, n, err; - - err = visit(&t->name_off, ctx); - if (err) - return err; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM64: { - struct btf_enum64 *m = btf_enum64(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - default: - break; - } - - return 0; -} - int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) { const struct btf_ext_info *seg; @@ -5212,3 +5172,325 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void return 0; } + +struct btf_distill { + struct btf_pipe pipe; + int *id_map; + unsigned int split_start_id; + unsigned int split_start_str; + int diff_id; +}; + +static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + struct btf_type *base_t; + + if (!*id) + continue; + /* split BTF id, not needed */ + if (*id >= dist->split_start_id) + continue; + /* already added ? */ + if (dist->id_map[*id] > 0) + continue; + + /* only a subset of base BTF types should be referenced from + * split BTF; ensure nothing unexpected is referenced. + */ + base_t = btf_type_by_id(dist->pipe.src, *id); + switch (btf_kind(base_t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + dist->id_map[*id] = *id; + break; + default: + pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n", + *id, btf_kind(base_t)); + return -EINVAL; + } + /* If a base type is used, ensure types it refers to are + * marked as used also; so for example if we find a PTR to INT + * we need both the PTR and INT. + * + * The only exception is named struct/unions, since distilled + * base BTF composite types have no members. + */ + if (btf_is_composite(base_t) && base_t->name_off) + continue; + err = btf_add_distilled_type_ids(dist, *id); + if (err) + return err; + } + return 0; +} + +static int btf_add_distilled_types(struct btf_distill *dist) +{ + bool adding_to_base = dist->pipe.dst->start_id == 1; + int id = btf__type_cnt(dist->pipe.dst); + struct btf_type *t; + int i, err = 0; + + + /* Add types for each of the required references to either distilled + * base or split BTF, depending on type characteristics. + */ + for (i = 1; i < dist->split_start_id; i++) { + const char *name; + int kind; + + if (!dist->id_map[i]) + continue; + t = btf_type_by_id(dist->pipe.src, i); + kind = btf_kind(t); + name = btf__name_by_offset(dist->pipe.src, t->name_off); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + /* Named int, float, fwd are added to base. */ + if (!adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Named struct/union are added to base as 0-vlen + * struct/union of same size. Anonymous struct/unions + * are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf_add_composite(dist->pipe.dst, kind, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* Named enum[64]s are added to base as a sized + * enum; relocation will match with appropriately-named + * and sized enum or enum64. + * + * Anonymous enums are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf__add_enum(dist->pipe.dst, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ARRAY: + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + /* All other types are added to split BTF. */ + if (adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + default: + pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n", + name, i, kind); + return -EINVAL; + + } + if (err < 0) + break; + dist->id_map[i] = id++; + } + return err; +} + +/* Split BTF ids without a mapping will be shifted downwards since distilled + * base BTF is smaller than the original base BTF. For those that have a + * mapping (either to base or updated split BTF), update the id based on + * that mapping. + */ +static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *t = btf_type_by_id(dist->pipe.dst, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + if (dist->id_map[*id]) + *id = dist->id_map[*id]; + else if (*id >= dist->split_start_id) + *id -= dist->diff_id; + } + return 0; +} + +/* Create updated split BTF with distilled base BTF; distilled base BTF + * consists of BTF information required to clarify the types that split + * BTF refers to, omitting unneeded details. Specifically it will contain + * base types and memberless definitions of named structs, unions and enumerated + * types. Associated reference types like pointers, arrays and anonymous + * structs, unions and enumerated types will be added to split BTF. + * Size is recorded for named struct/unions to help guide matching to the + * target base BTF during later relocation. + * + * The only case where structs, unions or enumerated types are fully represented + * is when they are anonymous; in such cases, the anonymous type is added to + * split BTF in full. + * + * We return newly-created split BTF where the split BTF refers to a newly-created + * distilled base BTF. Both must be freed separately by the caller. + */ +int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf) +{ + struct btf *new_base = NULL, *new_split = NULL; + const struct btf *old_base; + unsigned int n = btf__type_cnt(src_btf); + struct btf_distill dist = {}; + struct btf_type *t; + int i, err = 0; + + /* src BTF must be split BTF. */ + old_base = btf__base_btf(src_btf); + if (!new_base_btf || !new_split_btf || !old_base) + return libbpf_err(-EINVAL); + + new_base = btf__new_empty(); + if (!new_base) + return libbpf_err(-ENOMEM); + dist.id_map = calloc(n, sizeof(*dist.id_map)); + if (!dist.id_map) { + err = -ENOMEM; + goto done; + } + dist.pipe.src = src_btf; + dist.pipe.dst = new_base; + dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(dist.pipe.str_off_map)) { + err = -ENOMEM; + goto done; + } + dist.split_start_id = btf__type_cnt(old_base); + dist.split_start_str = old_base->hdr->str_len; + + /* Pass over src split BTF; generate the list of base BTF type ids it + * references; these will constitute our distilled BTF set to be + * distributed over base and split BTF as appropriate. + */ + for (i = src_btf->start_id; i < n; i++) { + err = btf_add_distilled_type_ids(&dist, i); + if (err < 0) + goto done; + } + /* Next add types for each of the required references to base BTF and split BTF + * in turn. + */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* Create new split BTF with distilled base BTF as its base; the final + * state is split BTF with distilled base BTF that represents enough + * about its base references to allow it to be relocated with the base + * BTF available. + */ + new_split = btf__new_empty_split(new_base); + if (!new_split) { + err = -errno; + goto done; + } + dist.pipe.dst = new_split; + /* First add all split types */ + for (i = src_btf->start_id; i < n; i++) { + t = btf_type_by_id(src_btf, i); + err = btf_add_type(&dist.pipe, t); + if (err < 0) + goto done; + } + /* Now add distilled types to split BTF that are not added to base. */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* All split BTF ids will be shifted downwards since there are less base + * BTF ids in distilled base BTF. + */ + dist.diff_id = dist.split_start_id - btf__type_cnt(new_base); + + n = btf__type_cnt(new_split); + /* Now update base/split BTF ids. */ + for (i = 1; i < n; i++) { + err = btf_update_distilled_type_ids(&dist, i); + if (err < 0) + break; + } +done: + free(dist.id_map); + hashmap__free(dist.pipe.str_off_map); + if (err) { + btf__free(new_split); + btf__free(new_base); + return libbpf_err(err); + } + *new_base_btf = new_base; + *new_split_btf = new_split; + + return 0; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf__type_cnt(base_btf); + btf->start_str_off = base_btf->hdr->str_len; +} + +int btf__relocate(struct btf *btf, const struct btf *base_btf) +{ + int err = btf_relocate(btf, base_btf, NULL); + + if (!err) + btf->owns_base = false; + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 8e6880d91c84..b68d216837a9 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -18,6 +18,7 @@ extern "C" { #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" +#define BTF_BASE_ELF_SEC ".BTF.base" #define MAPS_ELF_SEC ".maps" struct btf; @@ -107,6 +108,27 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +/** + * @brief **btf__distill_base()** creates new versions of the split BTF + * *src_btf* and its base BTF. The new base BTF will only contain the types + * needed to improve robustness of the split BTF to small changes in base BTF. + * When that split BTF is loaded against a (possibly changed) base, this + * distilled base BTF will help update references to that (possibly changed) + * base BTF. + * + * Both the new split and its associated new base BTF must be freed by + * the caller. + * + * If successful, 0 is returned and **new_base_btf** and **new_split_btf** + * will point at new base/split BTF. Both the new split and its associated + * new base BTF must be freed by the caller. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); @@ -231,6 +253,20 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); +/** + * @brief **btf__relocate()** will check the split BTF *btf* for references + * to base BTF kinds, and verify those references are compatible with + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to + * *base_btf* and type ids and strings are adjusted to accommodate this. + * + * If successful, 0 is returned and **btf** now has **base_btf** as its + * base. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); + struct btf_dump; struct btf_dump_opts { diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5dbca76b953f..894860111ddb 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1559,10 +1559,12 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, * Clang for BPF target generates func_proto with no * args as a func_proto with a single void arg (e.g., * `int (*f)(void)` vs just `int (*f)()`). We are - * going to pretend there are no args for such case. + * going to emit valid empty args (void) syntax for + * such case. Similarly and conveniently, valid + * no args case can be special-cased here as well. */ - if (vlen == 1 && p->type == 0) { - btf_dump_printf(d, ")"); + if (vlen == 0 || (vlen == 1 && p->type == 0)) { + btf_dump_printf(d, "void)"); return; } diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c new file mode 100644 index 000000000000..9a6c822c2294 --- /dev/null +++ b/tools/lib/bpf/btf_iter.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> + +#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) + +#else +#include "btf.h" +#include "libbpf_internal.h" +#endif + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + it->p = NULL; + it->m_idx = -1; + it->off_idx = 0; + it->vlen = 0; + + switch (iter_kind) { + case BTF_FIELD_ITER_IDS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} }; + break; + case BTF_KIND_ARRAY: + it->desc = (struct btf_field_desc) { + 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type), + sizeof(struct btf_type) + offsetof(struct btf_array, index_type)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, type)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, type)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, type)} + }; + break; + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_var_secinfo), + 1, {offsetof(struct btf_var_secinfo, type)} + }; + break; + default: + return -EINVAL; + } + break; + case BTF_FIELD_ITER_STRS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)} + }; + break; + case BTF_KIND_ENUM: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum), + 1, {offsetof(struct btf_enum, name_off)} + }; + break; + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum64), + 1, {offsetof(struct btf_enum64, name_off)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, name_off)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, name_off)} + }; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (it->desc.m_sz) + it->vlen = btf_vlen(t); + + it->p = t; + return 0; +} + +__u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + if (!it->p) + return NULL; + + if (it->m_idx < 0) { + if (it->off_idx < it->desc.t_off_cnt) + return it->p + it->desc.t_offs[it->off_idx++]; + /* move to per-member iteration */ + it->m_idx = 0; + it->p += sizeof(struct btf_type); + it->off_idx = 0; + } + + /* if type doesn't have members, stop */ + if (it->desc.m_sz == 0) { + it->p = NULL; + return NULL; + } + + if (it->off_idx >= it->desc.m_off_cnt) { + /* exhausted this member's fields, go to the next member */ + it->m_idx++; + it->p += it->desc.m_sz; + it->off_idx = 0; + } + + if (it->m_idx < it->vlen) + return it->p + it->desc.m_offs[it->off_idx++]; + + it->p = NULL; + return NULL; +} diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c new file mode 100644 index 000000000000..17f8b32f94a0 --- /dev/null +++ b/tools/lib/bpf/btf_relocate.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/bsearch.h> +#include <linux/btf.h> +#include <linux/sort.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> + +#define btf_type_by_id (struct btf_type *)btf_type_by_id +#define btf__type_cnt btf_nr_types +#define btf__base_btf btf_base_btf +#define btf__name_by_offset btf_name_by_offset +#define btf__str_by_offset btf_str_by_offset +#define btf_kflag btf_type_kflag + +#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) +#define free(ptr) kvfree(ptr) +#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) + +#else + +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +#endif /* __KERNEL__ */ + +struct btf; + +struct btf_relocate { + struct btf *btf; + const struct btf *base_btf; + const struct btf *dist_base_btf; + unsigned int nr_base_types; + unsigned int nr_split_types; + unsigned int nr_dist_base_types; + int dist_str_len; + int base_str_len; + __u32 *id_map; + __u32 *str_map; +}; + +/* Set temporarily in relocation id_map if distilled base struct/union is + * embedded in a split BTF struct/union; in such a case, size information must + * match between distilled base BTF and base BTF representation of type. + */ +#define BTF_IS_EMBEDDED ((__u32)-1) + +/* <name, size, id> triple used in sorting/searching distilled base BTF. */ +struct btf_name_info { + const char *name; + /* set when search requires a size match */ + bool needs_size: 1; + unsigned int size: 31; + __u32 id; +}; + +static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) + *id = r->id_map[*id]; + return 0; +} + +/* Simple string comparison used for sorting within BTF, since all distilled + * types are named. If strings match, and size is non-zero for both elements + * fall back to using size for ordering. + */ +static int cmp_btf_name_size(const void *n1, const void *n2) +{ + const struct btf_name_info *ni1 = n1; + const struct btf_name_info *ni2 = n2; + int name_diff = strcmp(ni1->name, ni2->name); + + if (!name_diff && ni1->needs_size && ni2->needs_size) + return ni2->size - ni1->size; + return name_diff; +} + +/* Binary search with a small twist; find leftmost element that matches + * so that we can then iterate through all exact matches. So for example + * searching { "a", "bb", "bb", "c" } we would always match on the + * leftmost "bb". + */ +static struct btf_name_info *search_btf_name_size(struct btf_name_info *key, + struct btf_name_info *vals, + int nelems) +{ + struct btf_name_info *ret = NULL; + int high = nelems - 1; + int low = 0; + + while (low <= high) { + int mid = (low + high)/2; + struct btf_name_info *val = &vals[mid]; + int diff = cmp_btf_name_size(key, val); + + if (diff == 0) + ret = val; + /* even if found, keep searching for leftmost match */ + if (diff <= 0) + high = mid - 1; + else + low = mid + 1; + } + return ret; +} + +/* If a member of a split BTF struct/union refers to a base BTF + * struct/union, mark that struct/union id temporarily in the id_map + * with BTF_IS_EMBEDDED. Members can be const/restrict/volatile/typedef + * reference types, but if a pointer is encountered, the type is no longer + * considered embedded. + */ +static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + if (!btf_is_composite(t)) + return 0; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) { + __u32 next_id = *id; + + while (next_id) { + t = btf_type_by_id(r->btf, next_id); + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: + next_id = t->type; + break; + case BTF_KIND_ARRAY: { + struct btf_array *a = btf_array(t); + + next_id = a->type; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (next_id < r->nr_dist_base_types) + r->id_map[next_id] = BTF_IS_EMBEDDED; + next_id = 0; + break; + default: + next_id = 0; + break; + } + } + } + + return 0; +} + +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate + * through base BTF looking up distilled type (using binary search) equivalents. + */ +static int btf_relocate_map_distilled_base(struct btf_relocate *r) +{ + struct btf_name_info *info, *info_end; + struct btf_type *base_t, *dist_t; + __u8 *base_name_cnt = NULL; + int err = 0; + __u32 id; + + /* generate a sort index array of name/type ids sorted by name for + * distilled base BTF to speed name-based lookups. + */ + info = calloc(r->nr_dist_base_types, sizeof(*info)); + if (!info) { + err = -ENOMEM; + goto done; + } + info_end = info + r->nr_dist_base_types; + for (id = 0; id < r->nr_dist_base_types; id++) { + dist_t = btf_type_by_id(r->dist_base_btf, id); + info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + info[id].id = id; + info[id].size = dist_t->size; + info[id].needs_size = true; + } + qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size); + + /* Mark distilled base struct/union members of split BTF structs/unions + * in id_map with BTF_IS_EMBEDDED; this signals that these types + * need to match both name and size, otherwise embedding the base + * struct/union in the split type is invalid. + */ + for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) { + err = btf_mark_embedded_composite_type_ids(r, id); + if (err) + goto done; + } + + /* Collect name counts for composite types in base BTF. If multiple + * instances of a struct/union of the same name exist, we need to use + * size to determine which to map to since name alone is ambiguous. + */ + base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt)); + if (!base_name_cnt) { + err = -ENOMEM; + goto done; + } + for (id = 1; id < r->nr_base_types; id++) { + base_t = btf_type_by_id(r->base_btf, id); + if (!btf_is_composite(base_t) || !base_t->name_off) + continue; + if (base_name_cnt[base_t->name_off] < 255) + base_name_cnt[base_t->name_off]++; + } + + /* Now search base BTF for matching distilled base BTF types. */ + for (id = 1; id < r->nr_base_types; id++) { + struct btf_name_info *dist_info, base_info = {}; + int dist_kind, base_kind; + + base_t = btf_type_by_id(r->base_btf, id); + /* distilled base consists of named types only. */ + if (!base_t->name_off) + continue; + base_kind = btf_kind(base_t); + base_info.id = id; + base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off); + switch (base_kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* These types should match both name and size */ + base_info.needs_size = true; + base_info.size = base_t->size; + break; + case BTF_KIND_FWD: + /* No size considerations for fwds. */ + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Size only needs to be used for struct/union if there + * are multiple types in base BTF with the same name. + * If there are multiple _distilled_ types with the same + * name (a very unlikely scenario), that doesn't matter + * unless corresponding _base_ types to match them are + * missing. + */ + base_info.needs_size = base_name_cnt[base_t->name_off] > 1; + base_info.size = base_t->size; + break; + default: + continue; + } + /* iterate over all matching distilled base types */ + for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types); + dist_info != NULL && dist_info < info_end && + cmp_btf_name_size(&base_info, dist_info) == 0; + dist_info++) { + if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) { + pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n", + id, dist_info->id); + err = -EINVAL; + goto done; + } + dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id); + dist_kind = btf_kind(dist_t); + + /* Validate that the found distilled type is compatible. + * Do not error out on mismatch as another match may + * occur for an identically-named type. + */ + switch (dist_kind) { + case BTF_KIND_FWD: + switch (base_kind) { + case BTF_KIND_FWD: + if (btf_kflag(dist_t) != btf_kflag(base_t)) + continue; + break; + case BTF_KIND_STRUCT: + if (btf_kflag(base_t)) + continue; + break; + case BTF_KIND_UNION: + if (!btf_kflag(base_t)) + continue; + break; + default: + continue; + } + break; + case BTF_KIND_INT: + if (dist_kind != base_kind || + btf_int_encoding(base_t) != btf_int_encoding(dist_t)) + continue; + break; + case BTF_KIND_FLOAT: + if (dist_kind != base_kind) + continue; + break; + case BTF_KIND_ENUM: + /* ENUM and ENUM64 are encoded as sized ENUM in + * distilled base BTF. + */ + if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64) + continue; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* size verification is required for embedded + * struct/unions. + */ + if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED && + base_t->size != dist_t->size) + continue; + break; + default: + continue; + } + if (r->id_map[dist_info->id] && + r->id_map[dist_info->id] != BTF_IS_EMBEDDED) { + /* we already have a match; this tells us that + * multiple base types of the same name + * have the same size, since for cases where + * multiple types have the same name we match + * on name and size. In this case, we have + * no way of determining which to relocate + * to in base BTF, so error out. + */ + pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n", + base_info.name, dist_info->id, + base_t->size, id, r->id_map[dist_info->id]); + err = -EINVAL; + goto done; + } + /* map id and name */ + r->id_map[dist_info->id] = id; + r->str_map[dist_t->name_off] = base_t->name_off; + } + } + /* ensure all distilled BTF ids now have a mapping... */ + for (id = 1; id < r->nr_dist_base_types; id++) { + const char *name; + + if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED) + continue; + dist_t = btf_type_by_id(r->dist_base_btf, id); + name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n", + name, id); + err = -EINVAL; + break; + } +done: + free(base_name_cnt); + free(info); + return err; +} + +/* distilled base should only have named int/float/enum/fwd/struct/union types. */ +static int btf_relocate_validate_distilled_base(struct btf_relocate *r) +{ + unsigned int i; + + for (i = 1; i < r->nr_dist_base_types; i++) { + struct btf_type *t = btf_type_by_id(r->dist_base_btf, i); + int kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FWD: + if (t->name_off) + break; + pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n", + i, kind); + return -EINVAL; + default: + pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n", + i, kind); + return -EINVAL; + } + } + return 0; +} + +static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *str_off; + int off, err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + + while ((str_off = btf_field_iter_next(&it))) { + if (!*str_off) + continue; + if (*str_off >= r->dist_str_len) { + *str_off += r->base_str_len - r->dist_str_len; + } else { + off = r->str_map[*str_off]; + if (!off) { + pr_warn("string '%s' [offset %u] is not mapped to base BTF", + btf__str_by_offset(r->btf, off), *str_off); + return -ENOENT; + } + *str_off = off; + } + } + return 0; +} + +/* If successful, output of relocation is updated BTF with base BTF pointing + * at base_btf, and type ids, strings adjusted accordingly. + */ +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map) +{ + unsigned int nr_types = btf__type_cnt(btf); + const struct btf_header *dist_base_hdr; + const struct btf_header *base_hdr; + struct btf_relocate r = {}; + int err = 0; + __u32 id, i; + + r.dist_base_btf = btf__base_btf(btf); + if (!base_btf || r.dist_base_btf == base_btf) + return -EINVAL; + + r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf); + r.nr_base_types = btf__type_cnt(base_btf); + r.nr_split_types = nr_types - r.nr_dist_base_types; + r.btf = btf; + r.base_btf = base_btf; + + r.id_map = calloc(nr_types, sizeof(*r.id_map)); + r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map)); + dist_base_hdr = btf_header(r.dist_base_btf); + base_hdr = btf_header(r.base_btf); + r.dist_str_len = dist_base_hdr->str_len; + r.base_str_len = base_hdr->str_len; + if (!r.id_map || !r.str_map) { + err = -ENOMEM; + goto err_out; + } + + err = btf_relocate_validate_distilled_base(&r); + if (err) + goto err_out; + + /* Split BTF ids need to be adjusted as base and distilled base + * have different numbers of types, changing the start id of split + * BTF. + */ + for (id = r.nr_dist_base_types; id < nr_types; id++) + r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types; + + /* Build a map from distilled base ids to actual base BTF ids; it is used + * to update split BTF id references. Also build a str_map mapping from + * distilled base BTF names to base BTF names. + */ + err = btf_relocate_map_distilled_base(&r); + if (err) + goto err_out; + + /* Next, rewrite type ids in split BTF, replacing split ids with updated + * ids based on number of types in base BTF, and base ids with + * relocated ids from base_btf. + */ + for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) { + err = btf_relocate_rewrite_type_id(&r, id); + if (err) + goto err_out; + } + /* String offsets now need to be updated using the str_map. */ + for (i = 0; i < r.nr_split_types; i++) { + err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types); + if (err) + goto err_out; + } + /* Finally reset base BTF to be base_btf */ + btf_set_base_btf(btf, base_btf); + + if (id_map) { + *id_map = r.id_map; + r.id_map = NULL; + } +err_out: + free(r.id_map); + free(r.str_map); + return err; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5401f2df463d..a3be6f8fac09 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -229,7 +229,30 @@ static const char * const prog_type_name[] = { static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); @@ -549,6 +572,7 @@ struct bpf_map { bool pinned; bool reused; bool autocreate; + bool autoattach; __u64 map_extra; }; @@ -1377,6 +1401,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.value_size = type->size; map->def.max_entries = 1; map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -4796,6 +4821,20 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) return 0; } +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info; @@ -10336,7 +10375,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); @@ -10345,7 +10384,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; @@ -12877,8 +12916,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) __u32 zero = 0; int err, fd; - if (!bpf_map__is_struct_ops(map)) + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); return libbpf_err_ptr(-EINVAL); + } if (map->fd < 0) { pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); @@ -13671,14 +13712,15 @@ int libbpf_num_possible_cpus(void) static int populate_skeleton_maps(const struct bpf_object *obj, struct bpf_map_skeleton *maps, - size_t map_cnt) + size_t map_cnt, size_t map_skel_sz) { int i; for (i = 0; i < map_cnt; i++) { - struct bpf_map **map = maps[i].map; - const char *name = maps[i].name; - void **mmaped = maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { @@ -13695,13 +13737,14 @@ static int populate_skeleton_maps(const struct bpf_object *obj, static int populate_skeleton_progs(const struct bpf_object *obj, struct bpf_prog_skeleton *progs, - size_t prog_cnt) + size_t prog_cnt, size_t prog_skel_sz) { int i; for (i = 0; i < prog_cnt; i++) { - struct bpf_program **prog = progs[i].prog; - const char *name = progs[i].name; + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { @@ -13742,13 +13785,13 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, } *s->obj = obj; - err = populate_skeleton_maps(obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); return libbpf_err(err); } - err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); return libbpf_err(err); @@ -13778,20 +13821,20 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) return libbpf_err(-errno); } - err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } - err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { - var_skel = &s->vars[var_idx]; + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; map = *var_skel->map; map_type_id = bpf_map__btf_value_type_id(map); map_type = btf__type_by_id(btf, map_type_id); @@ -13838,10 +13881,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; - void **mmaped = s->maps[i].mmaped; + void **mmaped = map_skel->mmaped; if (!mmaped) continue; @@ -13889,8 +13933,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; if (!prog->autoload || !prog->autoattach) continue; @@ -13922,6 +13967,38 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) */ } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link; + + if (!map->autocreate || !map->autoattach) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); + return libbpf_err(err); + } + } + return 0; } @@ -13930,11 +14007,25 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) @@ -13942,8 +14033,7 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) if (!s) return; - if (s->progs) - bpf_object__detach_skeleton(s); + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c3f77d9260fe..64a6a3d323e3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -98,7 +98,10 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, /** * @brief **libbpf_set_print()** sets user-provided log callback function to - * be used for libbpf warnings and informational messages. + * be used for libbpf warnings and informational messages. If the user callback + * is not set, messages are logged to stderr by default. The verbosity of these + * messages can be controlled by setting the environment variable + * LIBBPF_LOG_LEVEL to either warn, info, or debug. * @param fn The log print function. If NULL, libbpf won't print anything. * @return Pointer to old print function. * @@ -976,6 +979,23 @@ LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); /** + * @brief **bpf_map__set_autoattach()** sets whether libbpf has to auto-attach + * map during BPF skeleton attach phase. + * @param map the BPF map instance + * @param autoattach whether to attach map during BPF skeleton attach phase + * @return 0 on success; negative error code, otherwise + */ +LIBBPF_API int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach); + +/** + * @brief **bpf_map__autoattach()** returns whether BPF map is configured to + * auto-attach during BPF skeleton attach phase. + * @param map the BPF map instance + * @return true if map is set to auto-attach during skeleton attach phase; false, otherwise + */ +LIBBPF_API bool bpf_map__autoattach(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -1669,6 +1689,7 @@ struct bpf_map_skeleton { const char *name; struct bpf_map **map; void **mmaped; + struct bpf_link **link; }; struct bpf_prog_skeleton { diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c1ce8aa3520b..8f0d9ea3b1b4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -419,6 +419,10 @@ LIBBPF_1.4.0 { LIBBPF_1.5.0 { global: + btf__distill_base; + btf__relocate; + bpf_map__autoattach; + bpf_map__set_autoattach; bpf_program__attach_sockmap; ring__consume_n; ring_buffer__consume_n; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index a0dcfb82e455..408df59e0771 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -234,6 +234,9 @@ struct btf_type; struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); +const struct btf_header *btf_header(const struct btf *btf); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map); static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) { @@ -508,11 +511,33 @@ struct bpf_line_info_min { __u32 line_col; }; +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, @@ -597,13 +622,9 @@ static inline int ensure_good_fd(int fd) return fd; } -static inline int sys_dup2(int oldfd, int newfd) +static inline int sys_dup3(int oldfd, int newfd, int flags) { -#ifdef __NR_dup2 - return syscall(__NR_dup2, oldfd, newfd); -#else - return syscall(__NR_dup3, oldfd, newfd, 0); -#endif + return syscall(__NR_dup3, oldfd, newfd, flags); } /* Point *fixed_fd* to the same file that *tmp_fd* points to. @@ -614,7 +635,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = sys_dup2(tmp_fd, fixed_fd); + err = sys_dup3(tmp_fd, fixed_fd, O_CLOEXEC); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 0d4be829551b..9cd3d4109788 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -957,19 +957,33 @@ static int check_btf_str_off(__u32 *str_off, void *ctx) static int linker_sanity_check_btf(struct src_obj *obj) { struct btf_type *t; - int i, n, err = 0; + int i, n, err; if (!obj->btf) return 0; n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + t = btf_type_by_id(obj->btf, i); - err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); - err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((type_id = btf_field_iter_next(&it))) { + if (*type_id >= n) + return -EINVAL; + } + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return err; + while ((str_off = btf_field_iter_next(&it))) { + if (!btf__str_by_offset(obj->btf, *str_off)) + return -EINVAL; + } } return 0; @@ -2213,10 +2227,17 @@ static int linker_fixup_btf(struct src_obj *obj) vi = btf_var_secinfos(t); for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type); - const char *var_name = btf__str_by_offset(obj->btf, vt->name_off); - int var_linkage = btf_var(vt)->linkage; + const char *var_name; + int var_linkage; Elf64_Sym *sym; + /* could be a variable or function */ + if (!btf_is_var(vt)) + continue; + + var_name = btf__str_by_offset(obj->btf, vt->name_off); + var_linkage = btf_var(vt)->linkage; + /* no need to patch up static or extern vars */ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED) continue; @@ -2234,26 +2255,10 @@ static int linker_fixup_btf(struct src_obj *obj) return 0; } -static int remap_type_id(__u32 *type_id, void *ctx) -{ - int *id_map = ctx; - int new_id = id_map[*type_id]; - - /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ - if (new_id == 0 && *type_id != 0) { - pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); - return -EINVAL; - } - - *type_id = id_map[*type_id]; - - return 0; -} - static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; - int i, j, n, start_id, id; + int i, j, n, start_id, id, err; const char *name; if (!obj->btf) @@ -2324,9 +2329,25 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf__type_cnt(linker->btf); for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); + struct btf_field_iter it; + __u32 *type_id; - if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) - return -EINVAL; + err = btf_field_iter_init(&it, dst_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((type_id = btf_field_iter_next(&it))) { + int new_id = obj->btf_type_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", + *type_id); + return -EINVAL; + } + + *type_id = obj->btf_type_map[*type_id]; + } } /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 10c067e3a8d2..69affa251fa7 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -52,7 +52,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; - u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ @@ -78,15 +77,6 @@ static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, /* Finish linking remainder of list b on to tail */ tail->next = b; do { - /* - * If the merge is highly unbalanced (e.g. the input is - * already sorted), this loop may run many iterations. - * Continue callbacks to the client even though no - * element comparison is needed, so the client's cmp() - * routine can invoke cond_resched() periodically. - */ - if (unlikely(!++count)) - cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README index db90a26dbdf4..304162743a5b 100644 --- a/tools/memory-model/Documentation/README +++ b/tools/memory-model/Documentation/README @@ -47,6 +47,10 @@ DESCRIPTION OF FILES README This file. +access-marking.txt + Guidelines for marking intentionally concurrent accesses to + shared memory. + cheatsheet.txt Quick-reference guide to the Linux-kernel memory model. diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 65778222183e..3fbe77fd564a 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -6,7 +6,8 @@ normal accesses to shared memory, that is "normal" as in accesses that do not use read-modify-write atomic operations. It also describes how to document these accesses, both with comments and with special assertions processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion -builds on an earlier LWN article [1]. +builds on an earlier LWN article [1] and Linux Foundation mentorship +session [2]. ACCESS-MARKING OPTIONS @@ -24,6 +25,11 @@ The Linux kernel provides the following access-marking options: 4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" The various forms of atomic_set() also fit in here. +5. __data_racy, for example "int __data_racy a;" + +6. KCSAN's negative-marking assertions, ASSERT_EXCLUSIVE_ACCESS() + and ASSERT_EXCLUSIVE_WRITER(), are described in the + "ACCESS-DOCUMENTATION OPTIONS" section below. These may be used in combination, as shown in this admittedly improbable example: @@ -31,7 +37,7 @@ example: WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e)); Neither plain C-language accesses nor data_race() (#1 and #2 above) place -any sort of constraint on the compiler's choice of optimizations [2]. +any sort of constraint on the compiler's choice of optimizations [3]. In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the compiler's use of code-motion and common-subexpression optimizations. Therefore, if a given access is involved in an intentional data race, @@ -205,6 +211,23 @@ because doing otherwise prevents KCSAN from detecting violations of your code's synchronization rules. +Use of __data_racy +------------------ + +Adding the __data_racy type qualifier to the declaration of a variable +causes KCSAN to treat all accesses to that variable as if they were +enclosed by data_race(). However, __data_racy does not affect the +compiler, though one could imagine hardened kernel builds treating the +__data_racy type qualifier as if it was the volatile keyword. + +Note well that __data_racy is subject to the same pointer-declaration +rules as are other type qualifiers such as const and volatile. +For example: + + int __data_racy *p; // Pointer to data-racy data. + int *__data_racy p; // Data-racy pointer to non-data-racy data. + + ACCESS-DOCUMENTATION OPTIONS ============================ @@ -342,7 +365,7 @@ as follows: Because foo is read locklessly, all accesses are marked. The purpose of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy -concurrent lockless write. +concurrent write, whether marked or not. Lock-Protected Writes With Heuristic Lockless Reads @@ -594,5 +617,8 @@ REFERENCES [1] "Concurrency bugs should fear the big bad data-race detector (part 2)" https://lwn.net/Articles/816854/ -[2] "Who's afraid of a big bad optimizing compiler?" +[2] "The Kernel Concurrency Sanitizer" + https://www.linuxfoundation.org/webinars/the-kernel-concurrency-sanitizer + +[3] "Who's afraid of a big bad optimizing compiler?" https://lwn.net/Articles/793253/ diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 53b5a492739d..03c12efed66a 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -54,6 +54,12 @@ flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR *) empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest +(* + * In the same way, spin_is_locked() inside a critical section must always + * return True (no RU events can be in a critical section for the same lock). + *) +empty ([LKW] ; po-loc ; [RU]) \ (po-loc ; [UL] ; po-loc) as nested-is-locked + (* The final value of a spinlock should not be tested *) flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final @@ -79,42 +85,50 @@ empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks (* rfi for LF events: link each LKW to the LF events in its critical section *) let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) -(* rfe for LF events *) +(* Utility macro to convert a single pair to a single-edge relation *) +let pair-to-relation p = p ++ 0 + +(* + * If a given LF event e is outside a critical section, it cannot read + * internally but it may read from an LKW event in another thread. + * Compute the relation containing these possible edges. + *) +let possible-rfe-noncrit-lf e = (LKW * {e}) & loc & ext + +(* Compute set of sets of possible rfe edges for LF events *) let all-possible-rfe-lf = (* - * Given an LF event r, compute the possible rfe edges for that event - * (all those starting from LKW events in other threads), - * and then convert that relation to a set of single-edge relations. + * Convert the possible-rfe-noncrit-lf relation for e + * to a set of single edges *) - let possible-rfe-lf r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation ((LKW * {r}) & loc & ext) - (* Do this for each LF event r that isn't in rfi-lf *) - in map possible-rfe-lf (LF \ range(rfi-lf)) + let set-of-singleton-rfe-lf e = + map pair-to-relation (possible-rfe-noncrit-lf e) + (* Do this for each LF event e that isn't in rfi-lf *) + in map set-of-singleton-rfe-lf (LF \ range(rfi-lf)) (* Generate all rf relations for LF events *) with rfe-lf from cross(all-possible-rfe-lf) let rf-lf = rfe-lf | rfi-lf (* - * RU, i.e., spin_is_locked() returning False, is slightly different. - * We rely on the memory model to rule out cases where spin_is_locked() - * within one of the lock's critical sections returns False. + * A given RU event e may read internally from the last po-previous UL, + * or it may read from a UL event in another thread or the initial write. + * Compute the relation containing these possible edges. *) - -(* rfi for RU events: an RU may read from the last po-previous UL *) -let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc) - -(* rfe for RU events: an RU may read from an external UL or the initial write *) -let all-possible-rfe-ru = - let possible-rfe-ru r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation (((UL | IW) * {r}) & loc & ext) - in map possible-rfe-ru RU +let possible-rf-ru e = (((UL * {e}) & po-loc) \ + ([UL] ; po-loc ; [UL] ; po-loc)) | + (((UL | IW) * {e}) & loc & ext) + +(* Compute set of sets of possible rf edges for RU events *) +let all-possible-rf-ru = + (* Convert the possible-rf-ru relation for e to a set of single edges *) + let set-of-singleton-rf-ru e = + map pair-to-relation (possible-rf-ru e) + (* Do this for each RU event e *) + in map set-of-singleton-rf-ru RU (* Generate all rf relations for RU events *) -with rfe-ru from cross(all-possible-rfe-ru) -let rf-ru = rfe-ru | rfi-ru +with rf-ru from cross(all-possible-rf-ru) (* Final rf relation *) let rf = rf | rf-lf | rf-ru diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 7bb03606b9ea..15791c1c5b28 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -3,7 +3,7 @@ # include ../scripts/Makefile.include -BUILD_TARGETS=page-types slabinfo page_owner_sort +BUILD_TARGETS=page-types slabinfo page_owner_sort thp_swap_allocator_test INSTALL_TARGETS = $(BUILD_TARGETS) thpmaps LIB_DIR = ../lib/api diff --git a/tools/mm/thp_swap_allocator_test.c b/tools/mm/thp_swap_allocator_test.c new file mode 100644 index 000000000000..83afc52275a5 --- /dev/null +++ b/tools/mm/thp_swap_allocator_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * thp_swap_allocator_test + * + * The purpose of this test program is helping check if THP swpout + * can correctly get swap slots to swap out as a whole instead of + * being split. It randomly releases swap entries through madvise + * DONTNEED and swapin/out on two memory areas: a memory area for + * 64KB THP and the other area for small folios. The second memory + * can be enabled by "-s". + * Before running the program, we need to setup a zRAM or similar + * swap device by: + * echo lzo > /sys/block/zram0/comp_algorithm + * echo 64M > /sys/block/zram0/disksize + * echo never > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled + * echo always > /sys/kernel/mm/transparent_hugepage/hugepages-64kB/enabled + * mkswap /dev/zram0 + * swapon /dev/zram0 + * The expected result should be 0% anon swpout fallback ratio w/ or + * w/o "-s". + * + * Author(s): Barry Song <v-songbaohua@oppo.com> + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <errno.h> +#include <time.h> + +#define MEMSIZE_MTHP (60 * 1024 * 1024) +#define MEMSIZE_SMALLFOLIO (4 * 1024 * 1024) +#define ALIGNMENT_MTHP (64 * 1024) +#define ALIGNMENT_SMALLFOLIO (4 * 1024) +#define TOTAL_DONTNEED_MTHP (16 * 1024 * 1024) +#define TOTAL_DONTNEED_SMALLFOLIO (1 * 1024 * 1024) +#define MTHP_FOLIO_SIZE (64 * 1024) + +#define SWPOUT_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout" +#define SWPOUT_FALLBACK_PATH \ + "/sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpout_fallback" + +static void *aligned_alloc_mem(size_t size, size_t alignment) +{ + void *mem = NULL; + + if (posix_memalign(&mem, alignment, size) != 0) { + perror("posix_memalign"); + return NULL; + } + return mem; +} + +/* + * This emulates the behavior of native libc and Java heap, + * as well as process exit and munmap. It helps generate mTHP + * and ensures that iterations can proceed with mTHP, as we + * currently don't support large folios swap-in. + */ +static void random_madvise_dontneed(void *mem, size_t mem_size, + size_t align_size, size_t total_dontneed_size) +{ + size_t num_pages = total_dontneed_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + if (madvise(addr, align_size, MADV_DONTNEED) != 0) + perror("madvise dontneed"); + + memset(addr, 0x11, align_size); + } +} + +static void random_swapin(void *mem, size_t mem_size, + size_t align_size, size_t total_swapin_size) +{ + size_t num_pages = total_swapin_size / align_size; + size_t i; + size_t offset; + void *addr; + + for (i = 0; i < num_pages; ++i) { + offset = (rand() % (mem_size / align_size)) * align_size; + addr = (char *)mem + offset; + memset(addr, 0x11, align_size); + } +} + +static unsigned long read_stat(const char *path) +{ + FILE *file; + unsigned long value; + + file = fopen(path, "r"); + if (!file) { + perror("fopen"); + return 0; + } + + if (fscanf(file, "%lu", &value) != 1) { + perror("fscanf"); + fclose(file); + return 0; + } + + fclose(file); + return value; +} + +int main(int argc, char *argv[]) +{ + int use_small_folio = 0, aligned_swapin = 0; + void *mem1 = NULL, *mem2 = NULL; + int i; + + for (i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-s") == 0) + use_small_folio = 1; + else if (strcmp(argv[i], "-a") == 0) + aligned_swapin = 1; + } + + mem1 = aligned_alloc_mem(MEMSIZE_MTHP, ALIGNMENT_MTHP); + if (mem1 == NULL) { + fprintf(stderr, "Failed to allocate large folios memory\n"); + return EXIT_FAILURE; + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_HUGEPAGE) != 0) { + perror("madvise hugepage for mem1"); + free(mem1); + return EXIT_FAILURE; + } + + if (use_small_folio) { + mem2 = aligned_alloc_mem(MEMSIZE_SMALLFOLIO, ALIGNMENT_MTHP); + if (mem2 == NULL) { + fprintf(stderr, "Failed to allocate small folios memory\n"); + free(mem1); + return EXIT_FAILURE; + } + + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_NOHUGEPAGE) != 0) { + perror("madvise nohugepage for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + /* warm-up phase to occupy the swapfile */ + memset(mem1, 0x11, MEMSIZE_MTHP); + madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT); + if (use_small_folio) { + memset(mem2, 0x11, MEMSIZE_SMALLFOLIO); + madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT); + } + + /* iterations with newly created mTHP, swap-in, and swap-out */ + for (i = 0; i < 100; ++i) { + unsigned long initial_swpout; + unsigned long initial_swpout_fallback; + unsigned long final_swpout; + unsigned long final_swpout_fallback; + unsigned long swpout_inc; + unsigned long swpout_fallback_inc; + double fallback_percentage; + + initial_swpout = read_stat(SWPOUT_PATH); + initial_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + /* + * The following setup creates a 1:1 ratio of mTHP to small folios + * since large folio swap-in isn't supported yet. Once we support + * mTHP swap-in, we'll likely need to reduce MEMSIZE_MTHP and + * increase MEMSIZE_SMALLFOLIO to maintain the ratio. + */ + random_swapin(mem1, MEMSIZE_MTHP, + aligned_swapin ? ALIGNMENT_MTHP : ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_MTHP); + random_madvise_dontneed(mem1, MEMSIZE_MTHP, ALIGNMENT_MTHP, + TOTAL_DONTNEED_MTHP); + + if (use_small_folio) { + random_swapin(mem2, MEMSIZE_SMALLFOLIO, + ALIGNMENT_SMALLFOLIO, + TOTAL_DONTNEED_SMALLFOLIO); + } + + if (madvise(mem1, MEMSIZE_MTHP, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem1"); + free(mem1); + if (mem2 != NULL) + free(mem2); + return EXIT_FAILURE; + } + + if (use_small_folio) { + if (madvise(mem2, MEMSIZE_SMALLFOLIO, MADV_PAGEOUT) != 0) { + perror("madvise pageout for mem2"); + free(mem1); + free(mem2); + return EXIT_FAILURE; + } + } + + final_swpout = read_stat(SWPOUT_PATH); + final_swpout_fallback = read_stat(SWPOUT_FALLBACK_PATH); + + swpout_inc = final_swpout - initial_swpout; + swpout_fallback_inc = final_swpout_fallback - initial_swpout_fallback; + + fallback_percentage = (double)swpout_fallback_inc / + (swpout_fallback_inc + swpout_inc) * 100; + + printf("Iteration %d: swpout inc: %lu, swpout fallback inc: %lu, Fallback percentage: %.2f%%\n", + i + 1, swpout_inc, swpout_fallback_inc, fallback_percentage); + } + + free(mem1); + if (mem2 != NULL) + free(mem2); + + return EXIT_SUCCESS; +} diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 8e9e09d84e26..d1cdf2a8f826 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -2,9 +2,12 @@ SUBDIRS = lib generated samples -all: $(SUBDIRS) +all: $(SUBDIRS) libynl.a samples: | lib generated +libynl.a: | lib generated + @echo -e "\tAR $@" + @ar rcs $@ lib/ynl.o generated/*-user.o $(SUBDIRS): @if [ -f "$@/Makefile" ] ; then \ @@ -17,5 +20,6 @@ clean distclean: $(MAKE) -C $$dir $@; \ fi \ done + rm -f libynl.a .PHONY: all clean distclean $(SUBDIRS) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index f4e8eb79c1b8..0712b5e82eb7 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -16,7 +16,8 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) -CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) +CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) @@ -25,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index dfff3ecd1cba..2887cc5de530 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -14,7 +14,9 @@ include $(wildcard *.d) all: ynl.a ynl.a: $(OBJS) - ar rcs $@ $(OBJS) + @echo -e "\tAR $@" + @ar rcs $@ $(OBJS) + clean: rm -f *.o *.d *~ rm -rf __pycache__ diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 6cf890080dc0..3c09a7bbfba5 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -45,17 +45,17 @@ struct ynl_policy_attr { enum ynl_policy_type type; unsigned int len; const char *name; - struct ynl_policy_nest *nest; + const struct ynl_policy_nest *nest; }; struct ynl_policy_nest { unsigned int max_attr; - struct ynl_policy_attr *table; + const struct ynl_policy_attr *table; }; struct ynl_parse_arg { struct ynl_sock *ys; - struct ynl_policy_nest *rsp_policy; + const struct ynl_policy_nest *rsp_policy; void *data; }; @@ -79,7 +79,7 @@ static inline void *ynl_dump_obj_next(void *obj) struct ynl_dump_list_type *list; uptr -= offsetof(struct ynl_dump_list_type, data); - list = (void *)uptr; + list = (struct ynl_dump_list_type *)uptr; uptr = (unsigned long)list->next; uptr += offsetof(struct ynl_dump_list_type, data); @@ -119,7 +119,7 @@ struct ynl_dump_state { }; struct ynl_ntf_info { - struct ynl_policy_nest *policy; + const struct ynl_policy_nest *policy; ynl_parse_cb_t cb; size_t alloc_sz; void (*free)(struct ynl_ntf_base_type *ntf); @@ -139,7 +139,7 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) { - struct nlmsghdr *nlh = buf; + struct nlmsghdr *nlh = (struct nlmsghdr *)buf; memset(nlh, 0, sizeof(*nlh)); nlh->nlmsg_len = NLMSG_HDRLEN; @@ -196,7 +196,7 @@ static inline void *ynl_attr_data(const struct nlattr *attr) static inline void *ynl_attr_data_end(const struct nlattr *attr) { - return ynl_attr_data(attr) + ynl_attr_data_len(attr); + return (char *)ynl_attr_data(attr) + ynl_attr_data_len(attr); } #define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \ @@ -228,7 +228,7 @@ ynl_attr_next(const void *end, const struct nlattr *prev) { struct nlattr *attr; - attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len)); + attr = (struct nlattr *)((char *)prev + NLA_ALIGN(prev->nla_len)); return ynl_attr_if_good(end, attr); } @@ -237,8 +237,8 @@ ynl_attr_first(const void *start, size_t len, size_t skip) { struct nlattr *attr; - attr = (void *)((char *)start + NLMSG_ALIGN(skip)); - return ynl_attr_if_good(start + len, attr); + attr = (struct nlattr *)((char *)start + NLMSG_ALIGN(skip)); + return ynl_attr_if_good((char *)start + len, attr); } static inline bool @@ -262,9 +262,9 @@ ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) struct nlattr *attr; if (__ynl_attr_put_overflow(nlh, 0)) - return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN; + return (struct nlattr *)ynl_nlmsg_end_addr(nlh) - 1; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type | NLA_F_NESTED; nlh->nlmsg_len += NLA_HDRLEN; @@ -286,7 +286,7 @@ ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, if (__ynl_attr_put_overflow(nlh, size)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; attr->nla_len = NLA_HDRLEN + size; @@ -305,10 +305,10 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) if (__ynl_attr_put_overflow(nlh, len)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; - strcpy(ynl_attr_data(attr), str); + strcpy((char *)ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 4b9c091fc86b..fcb18a5a6d70 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -46,7 +46,7 @@ /* -- Netlink boiler plate */ static int -ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type, +ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type, char *str, int str_sz, int *n) { if (!policy) { @@ -75,8 +75,8 @@ ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type, static int ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, - struct ynl_policy_nest *policy, char *str, int str_sz, - struct ynl_policy_nest **nest_pol) + const struct ynl_policy_nest *policy, char *str, int str_sz, + const struct ynl_policy_nest **nest_pol) { unsigned int astart_off, aend_off; const struct nlattr *attr; @@ -206,7 +206,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, bad_attr[n] = '\0'; } if (tb[NLMSGERR_ATTR_MISS_TYPE]) { - struct ynl_policy_nest *nest_pol = NULL; + const struct ynl_policy_nest *nest_pol = NULL; unsigned int n, off, type; void *start, *end; int n2; @@ -296,7 +296,7 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) { - struct ynl_policy_attr *policy; + const struct ynl_policy_attr *policy; unsigned int type, len; unsigned char *data; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index eef7c6324ed4..6cd570b283ea 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -76,7 +76,7 @@ struct ynl_sock { struct ynl_ntf_base_type **ntf_last_next; struct nlmsghdr *nlh; - struct ynl_policy_nest *req_policy; + const struct ynl_policy_nest *req_policy; unsigned char *tx_buf; unsigned char *rx_buf; unsigned char raw_buf[]; diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 35e666928119..d42c1d605969 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -743,6 +743,8 @@ class YnlFamily(SpecFamily): decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) if 'enum' in attr_spec: decoded = self._decode_enum(decoded, attr_spec) + elif attr_spec.display_hint: + decoded = self._formatted_string(decoded, attr_spec.display_hint) elif attr_spec["type"] == 'indexed-array': decoded = self._decode_array_attr(attr, attr_spec) elif attr_spec["type"] == 'bitfield32': diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index a42d62b23ee0..51529fabd517 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -59,9 +59,9 @@ class Type(SpecAttr): if 'nested-attributes' in attr: self.nested_attrs = attr['nested-attributes'] if self.nested_attrs == family.name: - self.nested_render_name = c_lower(f"{family.name}") + self.nested_render_name = c_lower(f"{family.ident_name}") else: - self.nested_render_name = c_lower(f"{family.name}_{self.nested_attrs}") + self.nested_render_name = c_lower(f"{family.ident_name}_{self.nested_attrs}") if self.nested_attrs in self.family.consts: self.nested_struct_type = 'struct ' + self.nested_render_name + '_' @@ -693,9 +693,9 @@ class Struct: self.nested = type_list is None if family.name == c_lower(space_name): - self.render_name = c_lower(family.name) + self.render_name = c_lower(family.ident_name) else: - self.render_name = c_lower(family.name + '-' + space_name) + self.render_name = c_lower(family.ident_name + '-' + space_name) self.struct_name = 'struct ' + self.render_name if self.nested and space_name in family.consts: self.struct_name += '_' @@ -761,7 +761,7 @@ class EnumEntry(SpecEnumEntry): class EnumSet(SpecEnumSet): def __init__(self, family, yaml): - self.render_name = c_lower(family.name + '-' + yaml['name']) + self.render_name = c_lower(family.ident_name + '-' + yaml['name']) if 'enum-name' in yaml: if yaml['enum-name']: @@ -777,7 +777,7 @@ class EnumSet(SpecEnumSet): else: self.user_type = 'int' - self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-") + self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") super().__init__(family, yaml) @@ -802,9 +802,9 @@ class AttrSet(SpecAttrSet): if 'name-prefix' in yaml: pfx = yaml['name-prefix'] elif self.name == family.name: - pfx = family.name + '-a-' + pfx = family.ident_name + '-a-' else: - pfx = f"{family.name}-a-{self.name}-" + pfx = f"{family.ident_name}-a-{self.name}-" self.name_prefix = c_upper(pfx) self.max_name = c_upper(self.yaml.get('attr-max-name', f"{self.name_prefix}max")) self.cnt_name = c_upper(self.yaml.get('attr-cnt-name', f"__{self.name_prefix}max")) @@ -861,7 +861,7 @@ class Operation(SpecOperation): def __init__(self, family, yaml, req_value, rsp_value): super().__init__(family, yaml, req_value, rsp_value) - self.render_name = c_lower(family.name + '_' + self.name) + self.render_name = c_lower(family.ident_name + '_' + self.name) self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \ ('dump' in yaml and 'request' in yaml['dump']) @@ -911,11 +911,11 @@ class Family(SpecFamily): if 'uapi-header' in self.yaml: self.uapi_header = self.yaml['uapi-header'] else: - self.uapi_header = f"linux/{self.name}.h" + self.uapi_header = f"linux/{self.ident_name}.h" if self.uapi_header.startswith("linux/") and self.uapi_header.endswith('.h'): self.uapi_header_name = self.uapi_header[6:-2] else: - self.uapi_header_name = self.name + self.uapi_header_name = self.ident_name def resolve(self): self.resolve_up(super()) @@ -923,7 +923,7 @@ class Family(SpecFamily): if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}: raise Exception("Codegen only supported for genetlink") - self.c_name = c_lower(self.name) + self.c_name = c_lower(self.ident_name) if 'name-prefix' in self.yaml['operations']: self.op_prefix = c_upper(self.yaml['operations']['name-prefix']) else: @@ -1507,12 +1507,12 @@ def print_dump_prototype(ri): def put_typol_fwd(cw, struct): - cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;') + cw.p(f'extern const struct ynl_policy_nest {struct.render_name}_nest;') def put_typol(cw, struct): type_max = struct.attr_set.max_name - cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') + cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') for _, arg in struct.member_list(): arg.attr_typol(cw) @@ -1520,7 +1520,7 @@ def put_typol(cw, struct): cw.block_end(line=';') cw.nl() - cw.block_start(line=f'struct ynl_policy_nest {struct.render_name}_nest =') + cw.block_start(line=f'const struct ynl_policy_nest {struct.render_name}_nest =') cw.p(f'.max_attr = {type_max},') cw.p(f'.table = {struct.render_name}_policy,') cw.block_end(line=';') @@ -2173,7 +2173,7 @@ def print_kernel_op_table_fwd(family, cw, terminate): exported = not kernel_can_gen_family_struct(family) if not terminate or exported: - cw.p(f"/* Ops table for {family.name} */") + cw.p(f"/* Ops table for {family.ident_name} */") pol_to_struct = {'global': 'genl_small_ops', 'per-op': 'genl_ops', @@ -2225,12 +2225,12 @@ def print_kernel_op_table_fwd(family, cw, terminate): continue if 'do' in op: - name = c_lower(f"{family.name}-nl-{op_name}-doit") + name = c_lower(f"{family.ident_name}-nl-{op_name}-doit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct genl_info *info'], suffix=';') if 'dump' in op: - name = c_lower(f"{family.name}-nl-{op_name}-dumpit") + name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';') cw.nl() @@ -2256,13 +2256,13 @@ def print_kernel_op_table(family, cw): for x in op['dont-validate']])), ) for op_mode in ['do', 'dump']: if op_mode in op: - name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") members.append((op_mode + 'it', name)) if family.kernel_policy == 'per-op': struct = Struct(family, op['attribute-set'], type_list=op['do']['request']['attributes']) - name = c_lower(f"{family.name}-{op_name}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-nl-policy") members.append(('policy', name)) members.append(('maxattr', struct.attr_max_val.enum_name)) if 'flags' in op: @@ -2294,7 +2294,7 @@ def print_kernel_op_table(family, cw): members.append(('validate', ' | '.join([c_upper('genl-dont-validate-' + x) for x in dont_validate])), ) - name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") if 'pre' in op[op_mode]: members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre']))) members.append((op_mode + 'it', name)) @@ -2305,9 +2305,9 @@ def print_kernel_op_table(family, cw): type_list=op[op_mode]['request']['attributes']) if op.dual_policy: - name = c_lower(f"{family.name}-{op_name}-{op_mode}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-{op_mode}-nl-policy") else: - name = c_lower(f"{family.name}-{op_name}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-nl-policy") members.append(('policy', name)) members.append(('maxattr', struct.attr_max_val.enum_name)) flags = (op['flags'] if 'flags' in op else []) + ['cmd-cap-' + op_mode] @@ -2326,7 +2326,7 @@ def print_kernel_mcgrp_hdr(family, cw): cw.block_start('enum') for grp in family.mcgrps['list']: - grp_id = c_upper(f"{family.name}-nlgrp-{grp['name']},") + grp_id = c_upper(f"{family.ident_name}-nlgrp-{grp['name']},") cw.p(grp_id) cw.block_end(';') cw.nl() @@ -2339,7 +2339,7 @@ def print_kernel_mcgrp_src(family, cw): cw.block_start('static const struct genl_multicast_group ' + family.c_name + '_nl_mcgrps[] =') for grp in family.mcgrps['list']: name = grp['name'] - grp_id = c_upper(f"{family.name}-nlgrp-{name}") + grp_id = c_upper(f"{family.ident_name}-nlgrp-{name}") cw.p('[' + grp_id + '] = { "' + name + '", },') cw.block_end(';') cw.nl() @@ -2361,7 +2361,7 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return - cw.block_start(f"struct genl_family {family.name}_nl_family __ro_after_init =") + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') cw.p('.netnsok\t= true,') @@ -2429,7 +2429,7 @@ def render_uapi(family, cw): cw.p(' */') uapi_enum_start(family, cw, const, 'name') - name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-") + name_pfx = const.get('name-prefix', f"{family.ident_name}-{const['name']}-") for entry in enum.entries.values(): suffix = ',' if entry.value_change: @@ -2451,7 +2451,7 @@ def render_uapi(family, cw): cw.nl() elif const['type'] == 'const': defines.append([c_upper(family.get('c-define-name', - f"{family.name}-{const['name']}")), + f"{family.ident_name}-{const['name']}")), const['value']]) if defines: @@ -2529,7 +2529,7 @@ def render_uapi(family, cw): defines = [] for grp in family.mcgrps['list']: name = grp['name'] - defines.append([c_upper(grp.get('c-define-name', f"{family.name}-mcgrp-{name}")), + defines.append([c_upper(grp.get('c-define-name', f"{family.ident_name}-mcgrp-{name}")), f'{name}']) cw.nl() if defines: diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 657e881d2ea4..6c56d0d726b4 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -49,7 +49,7 @@ def inline(text: str) -> str: def sanitize(text: str) -> str: """Remove newlines and multiple spaces""" # This is useful for some fields that are spread across multiple lines - return str(text).replace("\n", "").strip() + return str(text).replace("\n", " ").strip() def rst_fields(key: str, value: str, level: int = 0) -> str: @@ -156,7 +156,10 @@ def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: lines = [] for key in do_dict.keys(): lines.append(rst_paragraph(bold(key), level + 1)) - lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + if key in ['request', 'reply']: + lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + else: + lines.append(headroom(level + 2) + do_dict[key] + "\n") return "\n".join(lines) @@ -172,13 +175,13 @@ def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: """Parse operations block""" - preprocessed = ["name", "doc", "title", "do", "dump"] + preprocessed = ["name", "doc", "title", "do", "dump", "flags"] linkable = ["fixed-header", "attribute-set"] lines = [] for operation in operations: lines.append(rst_section(namespace, 'operation', operation["name"])) - lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") + lines.append(rst_paragraph(operation["doc"]) + "\n") for key in operation.keys(): if key in preprocessed: @@ -188,6 +191,8 @@ def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: if key in linkable: value = rst_ref(namespace, key, value) lines.append(rst_fields(key, value, 0)) + if 'flags' in operation: + lines.append(rst_fields('flags', rst_list_inline(operation['flags']))) if "do" in operation: lines.append(rst_paragraph(":do:", 0)) diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt index fe39c2a8ef0d..7c3ee959b63c 100644 --- a/tools/objtool/Documentation/objtool.txt +++ b/tools/objtool/Documentation/objtool.txt @@ -284,6 +284,25 @@ the objtool maintainers. Otherwise the stack frame may not get created before the call. + objtool can help with pinpointing the exact function where it happens: + + $ OBJTOOL_ARGS="--verbose" make arch/x86/kvm/ + + arch/x86/kvm/kvm.o: warning: objtool: .altinstr_replacement+0xc5: call without frame pointer save/setup + arch/x86/kvm/kvm.o: warning: objtool: em_loop.part.0+0x29: (alt) + arch/x86/kvm/kvm.o: warning: objtool: em_loop.part.0+0x0: <=== (sym) + LD [M] arch/x86/kvm/kvm-intel.o + 0000 0000000000028220 <em_loop.part.0>: + 0000 28220: 0f b6 47 61 movzbl 0x61(%rdi),%eax + 0004 28224: 3c e2 cmp $0xe2,%al + 0006 28226: 74 2c je 28254 <em_loop.part.0+0x34> + 0008 28228: 48 8b 57 10 mov 0x10(%rdi),%rdx + 000c 2822c: 83 f0 05 xor $0x5,%eax + 000f 2822f: 48 c1 e0 04 shl $0x4,%rax + 0013 28233: 25 f0 00 00 00 and $0xf0,%eax + 0018 28238: 81 e2 d5 08 00 00 and $0x8d5,%edx + 001e 2823e: 80 ce 02 or $0x2,%dh + ... 2. file.o: warning: objtool: .text+0x53: unreachable instruction diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3a1d80a7878d..ed6bff0e01dc 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -125,8 +125,14 @@ bool arch_pc_relative_reloc(struct reloc *reloc) #define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) #define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) +/* + * Check the ModRM register. If there is a SIB byte then check with + * the SIB base register. But if the SIB base is 5 (i.e. CFI_BP) and + * ModRM mod is 0 then there is no base register. + */ #define rm_is(reg) (have_SIB() ? \ - sib_base == (reg) && sib_index == CFI_SP : \ + sib_base == (reg) && sib_index == CFI_SP && \ + (sib_base != CFI_BP || modrm_mod != 0) : \ modrm_rm == (reg)) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 4134d27c696b..4ea0f9815fda 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -9,6 +9,29 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt) { + static struct special_alt *group, *prev; + + /* + * Recompute orig_len for nested ALTERNATIVE()s. + */ + if (group && group->orig_sec == alt->orig_sec && + group->orig_off == alt->orig_off) { + + struct special_alt *iter = group; + for (;;) { + unsigned int len = max(iter->orig_len, alt->orig_len); + iter->orig_len = alt->orig_len = len; + + if (iter == prev) + break; + + iter = list_next_entry(iter, list); + } + + } else group = alt; + + prev = alt; + switch (feature) { case X86_FEATURE_SMAP: /* diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 5e21cfb7661d..387d56a7f5fb 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -144,7 +144,7 @@ static bool opts_valid(void) opts.static_call || opts.uaccess) { if (opts.dump_orc) { - ERROR("--dump can't be combined with other options"); + ERROR("--dump can't be combined with other actions"); return false; } @@ -159,7 +159,7 @@ static bool opts_valid(void) if (opts.dump_orc) return true; - ERROR("At least one command required"); + ERROR("At least one action required"); return false; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0a33d9195b7a..01237d167223 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1202,6 +1202,8 @@ static const char *uaccess_safe_builtin[] = { "__sanitizer_cov_trace_switch", /* KMSAN */ "kmsan_copy_to_user", + "kmsan_disable_current", + "kmsan_enable_current", "kmsan_report", "kmsan_unpoison_entry_regs", "kmsan_unpoison_memory", diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 7ebf29c91184..1e8141ef1b15 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -7,12 +7,16 @@ * Yes, this is unfortunate. A better solution is in the works. */ NORETURN(__fortify_panic) +NORETURN(__ia32_sys_exit) +NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) +NORETURN(__x64_sys_exit) +NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 91b1950f5bd8..097a69db82a0 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -84,6 +84,14 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, entry->new_len); } + orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); + if (!orig_reloc) { + WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + return -1; + } + + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); + if (entry->feature) { unsigned short feature; @@ -94,14 +102,6 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, arch_handle_alternative(feature, alt); } - orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); - if (!orig_reloc) { - WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); - return -1; - } - - reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); - if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); if (!new_reloc) { diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index 3992a67a87d9..c89d6bb6b184 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -4,6 +4,7 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 # # Syscall table generation for perf diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index 2386ebbf6dd4..b6b97de48233 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,5 +1,7 @@ +perf-util-y += header.o perf-util-y += perf_regs.o perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c new file mode 100644 index 000000000000..d962dff55512 --- /dev/null +++ b/tools/perf/arch/loongarch/util/header.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin <n.shubin@yadro.com> + * Bibo Mao <maobibo@loongson.cn> + * Huacai Chen <chenhuacai@loongson.cn> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <api/fs/fs.h> +#include <errno.h> +#include "util/debug.h" +#include "util/header.h" + +/* + * Output example from /proc/cpuinfo + * CPU Family : Loongson-64bit + * Model Name : Loongson-3C5000 + * CPU Revision : 0x10 + * FPU Revision : 0x01 + */ +#define CPUINFO_MODEL "Model Name" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + unsigned long line_sz; + char *line, *model, *cpuid; + FILE *file; + + file = fopen(CPUINFO, "r"); + if (file == NULL) + return NULL; + + line = model = cpuid = NULL; + while (getline(&line, &line_sz, file) != -1) { + if (strncmp(line, CPUINFO_MODEL, strlen(CPUINFO_MODEL))) + continue; + + model = _get_field(line); + if (!model) + goto out_free; + break; + } + + if (model && (asprintf(&cpuid, "%s", model) < 0)) + cpuid = NULL; + +out_free: + fclose(file); + free(model); + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + int ret = 0; + char *cpuid = _get_cpuid(); + + if (!cpuid) + return EINVAL; + + if (sz < strlen(cpuid)) { + ret = ENOBUFS; + goto out_free; + } + + scnprintf(buffer, sz, "%s", cpuid); + +out_free: + free(cpuid); + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} diff --git a/tools/perf/arch/loongarch/util/kvm-stat.c b/tools/perf/arch/loongarch/util/kvm-stat.c new file mode 100644 index 000000000000..a7859a3a9a51 --- /dev/null +++ b/tools/perf/arch/loongarch/util/kvm-stat.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <memory.h> +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/pmus.h" + +#define LOONGARCH_EXCEPTION_INT 0 +#define LOONGARCH_EXCEPTION_PIL 1 +#define LOONGARCH_EXCEPTION_PIS 2 +#define LOONGARCH_EXCEPTION_PIF 3 +#define LOONGARCH_EXCEPTION_PME 4 +#define LOONGARCH_EXCEPTION_FPD 15 +#define LOONGARCH_EXCEPTION_SXD 16 +#define LOONGARCH_EXCEPTION_ASXD 17 +#define LOONGARCH_EXCEPTION_GSPR 22 +#define LOONGARCH_EXCEPTION_CPUCFG 100 +#define LOONGARCH_EXCEPTION_CSR 101 +#define LOONGARCH_EXCEPTION_IOCSR 102 +#define LOONGARCH_EXCEPTION_IDLE 103 +#define LOONGARCH_EXCEPTION_OTHERS 104 +#define LOONGARCH_EXCEPTION_HVC 23 + +#define loongarch_exception_type \ + {LOONGARCH_EXCEPTION_INT, "Interrupt" }, \ + {LOONGARCH_EXCEPTION_PIL, "Mem Read" }, \ + {LOONGARCH_EXCEPTION_PIS, "Mem Store" }, \ + {LOONGARCH_EXCEPTION_PIF, "Inst Fetch" }, \ + {LOONGARCH_EXCEPTION_PME, "Mem Modify" }, \ + {LOONGARCH_EXCEPTION_FPD, "FPU" }, \ + {LOONGARCH_EXCEPTION_SXD, "LSX" }, \ + {LOONGARCH_EXCEPTION_ASXD, "LASX" }, \ + {LOONGARCH_EXCEPTION_GSPR, "Privilege Error" }, \ + {LOONGARCH_EXCEPTION_HVC, "Hypercall" }, \ + {LOONGARCH_EXCEPTION_CPUCFG, "CPUCFG" }, \ + {LOONGARCH_EXCEPTION_CSR, "CSR" }, \ + {LOONGARCH_EXCEPTION_IOCSR, "IOCSR" }, \ + {LOONGARCH_EXCEPTION_IDLE, "Idle" }, \ + {LOONGARCH_EXCEPTION_OTHERS, "Others" } + +define_exit_reasons_table(loongarch_exit_reasons, loongarch_exception_type); + +const char *vcpu_id_str = "vcpu_id"; +const char *kvm_exit_reason = "reason"; +const char *kvm_entry_trace = "kvm:kvm_enter"; +const char *kvm_reenter_trace = "kvm:kvm_reenter"; +const char *kvm_exit_trace = "kvm:kvm_exit"; +const char *kvm_events_tp[] = { + "kvm:kvm_enter", + "kvm:kvm_reenter", + "kvm:kvm_exit", + "kvm:kvm_exit_gspr", + NULL, +}; + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + return exit_event_begin(evsel, sample, key); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + /* + * LoongArch kvm is different with other architectures + * + * There is kvm:kvm_reenter or kvm:kvm_enter event adjacent with + * kvm:kvm_exit event. + * kvm:kvm_enter means returning to vmm and then to guest + * kvm:kvm_reenter means returning to guest immediately + */ + return evsel__name_is(evsel, kvm_entry_trace) || evsel__name_is(evsel, kvm_reenter_trace); +} + +static void event_gspr_get_key(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + unsigned int insn; + + key->key = LOONGARCH_EXCEPTION_OTHERS; + insn = evsel__intval(evsel, sample, "inst_word"); + + switch (insn >> 24) { + case 0: + /* CPUCFG inst trap */ + if ((insn >> 10) == 0x1b) + key->key = LOONGARCH_EXCEPTION_CPUCFG; + break; + case 4: + /* CSR inst trap */ + key->key = LOONGARCH_EXCEPTION_CSR; + break; + case 6: + /* IOCSR inst trap */ + if ((insn >> 15) == 0xc90) + key->key = LOONGARCH_EXCEPTION_IOCSR; + else if ((insn >> 15) == 0xc91) + /* Idle inst trap */ + key->key = LOONGARCH_EXCEPTION_IDLE; + break; + default: + key->key = LOONGARCH_EXCEPTION_OTHERS; + break; + } +} + +static struct child_event_ops child_events[] = { + { .name = "kvm:kvm_exit_gspr", .get_key = event_gspr_get_key }, + { NULL, NULL }, +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .child_ops = child_events, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events, }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "loongarch64"; + kvm->exit_reasons = loongarch_exit_reasons; + return 0; +} diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile index a8d25d005207..90c3c476a242 100644 --- a/tools/perf/arch/riscv/Makefile +++ b/tools/perf/arch/riscv/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 +HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 65ec3c66a375..f865cb0489ec 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,5 +1,6 @@ perf-util-y += perf_regs.o perf-util-y += header.o +perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/kvm-stat.c b/tools/perf/arch/riscv/util/kvm-stat.c new file mode 100644 index 000000000000..491aef449d1a --- /dev/null +++ b/tools/perf/arch/riscv/util/kvm-stat.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arch specific functions for perf kvm stat. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include <errno.h> +#include <memory.h> +#include "../../../util/evsel.h" +#include "../../../util/kvm-stat.h" +#include "riscv_exception_types.h" +#include "debug.h" + +define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_exception_class); + +const char *vcpu_id_str = "id"; +const char *kvm_exit_reason = "scause"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + +const char *kvm_events_tp[] = { + "kvm:kvm_entry", + "kvm:kvm_exit", + NULL, +}; + +static void event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->exit_reasons = riscv_exit_reasons; +} + +static bool event_begin(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return evsel__name_is(evsel, kvm_entry_trace); +} + +static bool event_end(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (evsel__name_is(evsel, kvm_exit_trace)) { + event_get_key(evsel, sample, key); + return true; + } + return false; +} + +static struct kvm_events_ops exit_events = { + .is_begin_event = event_begin, + .is_end_event = event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { + .name = "vmexit", + .ops = &exit_events, + }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + kvm->exit_reasons_isa = "riscv64"; + return 0; +} diff --git a/tools/perf/arch/riscv/util/riscv_exception_types.h b/tools/perf/arch/riscv/util/riscv_exception_types.h new file mode 100644 index 000000000000..c49b8fa5e847 --- /dev/null +++ b/tools/perf/arch/riscv/util/riscv_exception_types.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_RISCV_EXCEPTION_TYPES_H +#define ARCH_PERF_RISCV_EXCEPTION_TYPES_H + +#define EXC_INST_MISALIGNED 0 +#define EXC_INST_ACCESS 1 +#define EXC_INST_ILLEGAL 2 +#define EXC_BREAKPOINT 3 +#define EXC_LOAD_MISALIGNED 4 +#define EXC_LOAD_ACCESS 5 +#define EXC_STORE_MISALIGNED 6 +#define EXC_STORE_ACCESS 7 +#define EXC_SYSCALL 8 +#define EXC_HYPERVISOR_SYSCALL 9 +#define EXC_SUPERVISOR_SYSCALL 10 +#define EXC_INST_PAGE_FAULT 12 +#define EXC_LOAD_PAGE_FAULT 13 +#define EXC_STORE_PAGE_FAULT 15 +#define EXC_INST_GUEST_PAGE_FAULT 20 +#define EXC_LOAD_GUEST_PAGE_FAULT 21 +#define EXC_VIRTUAL_INST_FAULT 22 +#define EXC_STORE_GUEST_PAGE_FAULT 23 + +#define EXC(x) {EXC_##x, #x } + +#define kvm_riscv_exception_class \ + EXC(INST_MISALIGNED), EXC(INST_ACCESS), EXC(INST_ILLEGAL), \ + EXC(BREAKPOINT), EXC(LOAD_MISALIGNED), EXC(LOAD_ACCESS), \ + EXC(STORE_MISALIGNED), EXC(STORE_ACCESS), EXC(SYSCALL), \ + EXC(HYPERVISOR_SYSCALL), EXC(SUPERVISOR_SYSCALL), \ + EXC(INST_PAGE_FAULT), EXC(LOAD_PAGE_FAULT), EXC(STORE_PAGE_FAULT), \ + EXC(INST_GUEST_PAGE_FAULT), EXC(LOAD_GUEST_PAGE_FAULT), \ + EXC(VIRTUAL_INST_FAULT), EXC(STORE_GUEST_PAGE_FAULT) + +#endif /* ARCH_PERF_RISCV_EXCEPTION_TYPES_H */ diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index e30fd55f8e51..cd3b480d20bd 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -26,7 +26,6 @@ static bool is_ignored_symbol(const char *name, char type) * when --all-symbols is specified so exclude them to get a * stable symbol list. */ - "kallsyms_addresses", "kallsyms_offsets", "kallsyms_relative_base", "kallsyms_num_syms", diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index b53753dee02f..6c02f401069e 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -67,6 +67,7 @@ LANGUAGES = de fr it cs pt ka bindir ?= /usr/bin sbindir ?= /usr/sbin mandir ?= /usr/man +libdir ?= /usr/lib includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -94,15 +95,6 @@ RANLIB = $(CROSS)ranlib HOSTCC = gcc MKDIR = mkdir -# 64bit library detection -include ../../scripts/Makefile.arch - -ifeq ($(IS_64_BIT), 1) -libdir ?= /usr/lib64 -else -libdir ?= /usr/lib -endif - # Now we set up the build system # @@ -332,4 +324,39 @@ uninstall: rm -f $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; -.PHONY: all utils libcpupower update-po create-gmo install-lib install-tools install-man install-gmo install uninstall clean +help: + @echo 'Building targets:' + @echo ' all - Default target. Could be omitted. Put build artifacts' + @echo ' to "O" cmdline option dir (default: current dir)' + @echo ' install - Install previously built project files from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or' + @echo ' Makefile config block option (default: "")' + @echo ' install-lib - Install previously built library binary from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' and library headers from "lib/" for userspace to the install' + @echo ' dir defined by "DESTDIR" cmdline (default: "")' + @echo ' install-tools - Install previously built "cpupower" util from the output' + @echo ' dir defined by "O" cmdline option (default: current dir) and' + @echo ' "cpupower-completion.sh" script from the src dir to the' + @echo ' install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-man - Install man pages from the "man" src subdir to the' + @echo ' install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-gmo - Install previously built language files from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-bench - Install previously built "cpufreq-bench" util files from the' + @echo ' output dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo '' + @echo 'Cleaning targets:' + @echo ' clean - Clean build artifacts from the dir defined by "O" cmdline' + @echo ' option (default: current dir)' + @echo ' uninstall - Remove previously installed files from the dir defined by "DESTDIR"' + @echo ' cmdline or Makefile config block option (default: "")' + +.PHONY: all utils libcpupower update-po create-gmo install-lib install-tools install-man install-gmo install uninstall clean help diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index 1c68f47663b2..2678ed81d311 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -22,16 +22,156 @@ interfaces [depending on configuration, see below]. compilation and installation ---------------------------- -make -su -make install - -should suffice on most systems. It builds libcpupower to put in -/usr/lib; cpupower, cpufreq-bench_plot.sh to put in /usr/bin; and -cpufreq-bench to put in /usr/sbin. If you want to set up the paths -differently and/or want to configure the package to your specific -needs, you need to open "Makefile" with an editor of your choice and -edit the block marked CONFIGURATION. +There are 2 output directories - one for the build output and another for +the installation of the build results, that is the utility, library, +man pages, etc... + +default directory +----------------- + +In the case of default directory, build and install process requires no +additional parameters: + +build +----- + +$ make + +The output directory for the 'make' command is the current directory and +its subdirs in the kernel tree: +tools/power/cpupower + +install +------- + +$ sudo make install + +'make install' command puts targets to default system dirs: + +----------------------------------------------------------------------- +| Installing file | System dir | +----------------------------------------------------------------------- +| libcpupower | /usr/lib | +----------------------------------------------------------------------- +| cpupower | /usr/bin | +----------------------------------------------------------------------- +| cpufreq-bench_plot.sh | /usr/bin | +----------------------------------------------------------------------- +| man pages | /usr/man | +----------------------------------------------------------------------- + +To put it in other words it makes build results available system-wide, +enabling any user to simply start using it without any additional steps + +custom directory +---------------- + +There are 2 make's command-line variables 'O' and 'DESTDIR' that setup +appropriate dirs: +'O' - build directory +'DESTDIR' - installation directory. This variable could also be setup in +the 'CONFIGURATION' block of the "Makefile" + +build +----- + +$ make O=<your_custom_build_catalog> + +Example: +$ make O=/home/hedin/prj/cpupower/build + +install +------- + +$ make O=<your_custom_build_catalog> DESTDIR=<your_custom_install_catalog> + +Example: +$ make O=/home/hedin/prj/cpupower/build DESTDIR=/home/hedin/prj/cpupower \ +> install + +Notice that both variables 'O' and 'DESTDIR' have been provided. The reason +is that the build results are saved in the custom output dir defined by 'O' +variable. So, this dir is the source for the installation step. If only +'DESTDIR' were provided then the 'install' target would assume that the +build directory is the current one, build everything there and install +from the current dir. + +The files will be installed to the following dirs: + +----------------------------------------------------------------------- +| Installing file | System dir | +----------------------------------------------------------------------- +| libcpupower | ${DESTDIR}/usr/lib | +----------------------------------------------------------------------- +| cpupower | ${DESTDIR}/usr/bin | +----------------------------------------------------------------------- +| cpufreq-bench_plot.sh | ${DESTDIR}/usr/bin | +----------------------------------------------------------------------- +| man pages | ${DESTDIR}/usr/man | +----------------------------------------------------------------------- + +If you look at the table for the default 'make' output dirs you will +notice that the only difference with the non-default case is the +${DESTDIR} prefix. So, the structure of the output dirs remains the same +regardles of the root output directory. + + +clean and uninstall +------------------- + +'clean' target is intended for cleanup the build catalog from build results +'uninstall' target is intended for removing installed files from the +installation directory + +default directory +----------------- + +This case is a straightforward one: +$ make clean +$ make uninstall + +custom directory +---------------- + +Use 'O' command line variable to remove previously built files from the +build dir: +$ make O=<your_custom_build_catalog> clean + +Example: +$ make O=/home/hedin/prj/cpupower/build clean + +Use 'DESTDIR' command line variable to uninstall previously installed files +from the given dir: +$ make DESTDIR=<your_custom_install_catalog> + +Example: +make DESTDIR=/home/hedin/prj/cpupower uninstall + + +running the tool +---------------- + +default directory +----------------- + +$ sudo cpupower + +custom directory +---------------- + +When it comes to run the utility from the custom build catalog things +become a little bit complicated as 'just run' approach doesn't work. +Assuming that the current dir is '<your_custom_install_catalog>/usr', +issuing the following command: + +$ sudo ./bin/cpupower +will produce the following error output: +./bin/cpupower: error while loading shared libraries: libcpupower.so.1: +cannot open shared object file: No such file or directory + +The issue is that binary cannot find the 'libcpupower' library. So, we +shall point to the lib dir: +sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower THANKS diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index a4b902f9e1c4..34e5894476eb 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -1,4 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 +ifeq ($(MAKELEVEL),0) +$(error This Makefile is not intended to be run standalone, but only as a part \ +of the main one in the parent dir) +endif + OUTPUT := ./ ifeq ("$(origin O)", "command line") ifneq ($(O),) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 8ee737eefa5c..89af019f8dc4 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -81,11 +81,6 @@ Measure idle and frequency characteristics of an arbitrary command/workload. The executable \fBcommand\fP is forked and upon its exit, statistics gathered since it was forked are displayed. .RE -.PP -\-v -.RS 4 -Increase verbosity if the binary was compiled with the DEBUG option set. -.RE .SH MONITOR DESCRIPTIONS .SS "Idle_Stats" @@ -172,9 +167,11 @@ displayed. "BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 14h Processors" https://support.amd.com/us/Processor_TechDocs/43170.pdf -"Intel® Turbo Boost Technology -in Intel® Coreâ„¢ Microarchitecture (Nehalem) Based Processors" -http://download.intel.com/design/processor/applnots/320354.pdf +"What Is Intel® Turbo Boost Technology?" +https://www.intel.com/content/www/us/en/gaming/resources/turbo-boost.html + +"Power Management - Technology Overview" +https://cdrdv2.intel.com/v1/dl/getContent/637748 "Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide" diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 075e766ff1f3..f746099b5dac 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -35,7 +35,7 @@ static unsigned int avail_monitors; static char *progname; enum operation_mode_e { list = 1, show, show_all }; -static int mode; +static enum operation_mode_e mode; static int interval = 1; static char *show_monitors_param; static struct cpupower_topology cpu_top; diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index f96f50e0c336..8a3ef94fe88f 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -77,12 +77,12 @@ class SystemValues(aslib.SystemValues): fp.close() self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S') def kernelVersion(self, msg): - m = re.match('^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg) + m = re.match(r'^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg) if m: return m.group('v') return 'unknown' def checkFtraceKernelVersion(self): - m = re.match('^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel) + m = re.match(r'^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel) if m: val = tuple(map(int, m.groups())) if val >= (4, 10, 0): @@ -324,7 +324,7 @@ def parseKernelLog(): idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -332,24 +332,24 @@ def parseKernelLog(): break msg = m.group('msg') data.dmesgtext.append(line) - if(ktime == 0.0 and re.match('^Linux version .*', msg)): + if(ktime == 0.0 and re.match(r'^Linux version .*', msg)): if(not sysvals.stamp['kernel']): sysvals.stamp['kernel'] = sysvals.kernelVersion(msg) continue - m = re.match('.* setting system clock to (?P<d>[0-9\-]*)[ A-Z](?P<t>[0-9:]*) UTC.*', msg) + m = re.match(r'.* setting system clock to (?P<d>[0-9\-]*)[ A-Z](?P<t>[0-9:]*) UTC.*', msg) if(m): bt = datetime.strptime(m.group('d')+' '+m.group('t'), '%Y-%m-%d %H:%M:%S') bt = bt - timedelta(seconds=int(ktime)) data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') continue - m = re.match('^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg) + m = re.match(r'^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg) if(m): func = m.group('f') pid = int(m.group('p')) devtemp[func] = (ktime, pid) continue - m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg) + m = re.match(r'^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg) if(m): data.valid = True data.end = ktime @@ -359,7 +359,7 @@ def parseKernelLog(): data.newAction(phase, f, pid, start, ktime, int(r), int(t)) del devtemp[f] continue - if(re.match('^Freeing unused kernel .*', msg)): + if(re.match(r'^Freeing unused kernel .*', msg)): data.tUserMode = ktime data.dmesg['kernel']['end'] = ktime data.dmesg['user']['start'] = ktime diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 40ad221e8881..ef87e63c05c7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -86,7 +86,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.11' + version = '5.12' ansi = False rs = 0 display = '' @@ -420,11 +420,11 @@ class SystemValues: return value.format(**args) def setOutputFile(self): if self.dmesgfile != '': - m = re.match('(?P<name>.*)_dmesg\.txt.*', self.dmesgfile) + m = re.match(r'(?P<name>.*)_dmesg\.txt.*', self.dmesgfile) if(m): self.htmlfile = m.group('name')+'.html' if self.ftracefile != '': - m = re.match('(?P<name>.*)_ftrace\.txt.*', self.ftracefile) + m = re.match(r'(?P<name>.*)_ftrace\.txt.*', self.ftracefile) if(m): self.htmlfile = m.group('name')+'.html' def systemInfo(self, info): @@ -464,15 +464,15 @@ class SystemValues: if os.path.exists('/proc/cpuinfo'): with open('/proc/cpuinfo', 'r') as fp: for line in fp: - if re.match('^processor[ \t]*:[ \t]*[0-9]*', line): + if re.match(r'^processor[ \t]*:[ \t]*[0-9]*', line): self.cpucount += 1 if os.path.exists('/proc/meminfo'): with open('/proc/meminfo', 'r') as fp: for line in fp: - m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line) + m = re.match(r'^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line) if m: self.memtotal = int(m.group('sz')) - m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line) + m = re.match(r'^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line) if m: self.memfree = int(m.group('sz')) if os.path.exists('/etc/os-release'): @@ -539,7 +539,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(m): ktime = m.group('ktime') break @@ -553,7 +553,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -636,11 +636,11 @@ class SystemValues: # now process the args for arg in sorted(args): arglist[arg] = '' - m = re.match('.* '+arg+'=(?P<arg>.*) ', data); + m = re.match(r'.* '+arg+'=(?P<arg>.*) ', data); if m: arglist[arg] = m.group('arg') else: - m = re.match('.* '+arg+'=(?P<arg>.*)', data); + m = re.match(r'.* '+arg+'=(?P<arg>.*)', data); if m: arglist[arg] = m.group('arg') out = fmt.format(**arglist) @@ -989,7 +989,7 @@ class SystemValues: m = re.match(tp.ftrace_line_fmt, line) if(not m or 'device_pm_callback_start' not in line): continue - m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg')); + m = re.match(r'.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg')); if(not m): continue dev = m.group('d') @@ -999,7 +999,7 @@ class SystemValues: # now get the syspath for each target device for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(re.match('.*/power', dirname) and 'async' in filenames): + if(re.match(r'.*/power', dirname) and 'async' in filenames): dev = dirname.split('/')[-2] if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): props[dev].syspath = dirname[:-6] @@ -1143,12 +1143,12 @@ class SystemValues: elif value and os.path.exists(file): fp = open(file, 'r+') if fmt == 'radio': - m = re.match('.*\[(?P<v>.*)\].*', fp.read()) + m = re.match(r'.*\[(?P<v>.*)\].*', fp.read()) if m: self.cfgdef[file] = m.group('v') elif fmt == 'acpi': line = fp.read().strip().split('\n')[-1] - m = re.match('.* (?P<v>[0-9A-Fx]*) .*', line) + m = re.match(r'.* (?P<v>[0-9A-Fx]*) .*', line) if m: self.cfgdef[file] = m.group('v') else: @@ -1173,7 +1173,7 @@ class SystemValues: fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr out = ascii(fp.read()).strip() fp.close() - if re.match('turbostat version .*', out): + if re.match(r'turbostat version .*', out): self.vprint(out) return True return False @@ -1181,33 +1181,33 @@ class SystemValues: cmd = self.getExec('turbostat') rawout = keyline = valline = '' fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile) - fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE).stderr - for line in fp: + fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE) + for line in fp.stderr: line = ascii(line) rawout += line if keyline and valline: continue - if re.match('(?i)Avg_MHz.*', line): + if re.match(r'(?i)Avg_MHz.*', line): keyline = line.strip().split() elif keyline: valline = line.strip().split() - fp.close() + fp.wait() if not keyline or not valline or len(keyline) != len(valline): errmsg = 'unrecognized turbostat output:\n'+rawout.strip() self.vprint(errmsg) if not self.verbose: pprint(errmsg) - return '' + return (fp.returncode, '') if self.verbose: pprint(rawout.strip()) out = [] for key in keyline: idx = keyline.index(key) val = valline[idx] - if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val): + if key == 'SYS%LPI' and not s0ixready and re.match(r'^[0\.]*$', val): continue out.append('%s=%s' % (key, val)) - return '|'.join(out) + return (fp.returncode, '|'.join(out)) def netfixon(self, net='both'): cmd = self.getExec('netfix') if not cmd: @@ -1232,7 +1232,7 @@ class SystemValues: except: return '' for line in reversed(w.split('\n')): - m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line) + m = re.match(r' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line) if not m or (dev and dev != m.group('dev')): continue return m.group('dev') @@ -1261,14 +1261,14 @@ class SystemValues: return arr = msg.split() for j in range(len(arr)): - if re.match('^[0-9,\-\.]*$', arr[j]): - arr[j] = '[0-9,\-\.]*' + if re.match(r'^[0-9,\-\.]*$', arr[j]): + arr[j] = r'[0-9,\-\.]*' else: arr[j] = arr[j]\ - .replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\ - .replace('.', '\.').replace('+', '\+').replace('*', '\*')\ - .replace('(', '\(').replace(')', '\)').replace('}', '\}')\ - .replace('{', '\{') + .replace('\\', r'\\\\').replace(']', r'\]').replace('[', r'\[')\ + .replace('.', r'\.').replace('+', r'\+').replace('*', r'\*')\ + .replace('(', r'\(').replace(')', r'\)').replace('}', r'\}')\ + .replace('{', r'\{') mstr = ' *'.join(arr) entry = { 'line': msg, @@ -1340,7 +1340,7 @@ class SystemValues: fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout ret = 'unknown' for line in fp: - m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line)) + m = re.match(r'[\s]*Monitor is (?P<m>.*)', ascii(line)) if(m and len(m.group('m')) >= 2): out = m.group('m').lower() ret = out[3:] if out[0:2] == 'in' else out @@ -1566,7 +1566,7 @@ class Data: i += 1 if tp.stampInfo(line, sysvals): continue - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if not m: continue t = float(m.group('ktime')) @@ -1574,7 +1574,7 @@ class Data: continue dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - if re.match('capability: warning: .*', msg): + if re.match(r'capability: warning: .*', msg): continue for err in self.errlist: if re.match(self.errlist[err], msg): @@ -1679,8 +1679,8 @@ class Data: ubiquitous = False if kprobename in dtf and 'ub' in dtf[kprobename]: ubiquitous = True - mc = re.match('\(.*\) *(?P<args>.*)', cdata) - mr = re.match('\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata) + mc = re.match(r'\(.*\) *(?P<args>.*)', cdata) + mr = re.match(r'\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata) if mc and mr: c = mr.group('caller').split('+')[0] a = mc.group('args').strip() @@ -1997,7 +1997,7 @@ class Data: list = self.dmesg[phase]['list'] mydev = '' for devname in sorted(list): - if name == devname or re.match('^%s\[(?P<num>[0-9]*)\]$' % name, devname): + if name == devname or re.match(r'^%s\[(?P<num>[0-9]*)\]$' % name, devname): mydev = devname if mydev: return list[mydev] @@ -2099,7 +2099,7 @@ class Data: for dev in sorted(list): pdev = list[dev]['par'] pid = list[dev]['pid'] - if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): + if(pid < 0 or re.match(r'[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): continue if pdev and pdev not in real and pdev not in rootlist: rootlist.append(pdev) @@ -2190,26 +2190,26 @@ class Data: if 'resume_complete' in dm: dm['resume_complete']['end'] = time def initcall_debug_call(self, line, quick=False): - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - 'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - 'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\ - '(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\ + r'(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line) if m: return True if quick else m.group('t', 'f', 'n', 'p') return False if quick else ('', '', '', '') def initcall_debug_return(self, line, quick=False): - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\ - '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\ + r'.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\ - '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\ + r'(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line) if m: return True if quick else m.group('t', 'f', 'dt') return False if quick else ('', '', '') @@ -2294,28 +2294,28 @@ class FTraceLine: if not m and not d: return # is this a trace event - if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)): + if(d == 'traceevent' or re.match(r'^ *\/\* *(?P<msg>.*) \*\/ *$', m)): if(d == 'traceevent'): # nop format trace event msg = m else: # function_graph format trace event - em = re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m) + em = re.match(r'^ *\/\* *(?P<msg>.*) \*\/ *$', m) msg = em.group('msg') - emm = re.match('^(?P<call>.*?): (?P<msg>.*)', msg) + emm = re.match(r'^(?P<call>.*?): (?P<msg>.*)', msg) if(emm): self.name = emm.group('msg') self.type = emm.group('call') else: self.name = msg - km = re.match('^(?P<n>.*)_cal$', self.type) + km = re.match(r'^(?P<n>.*)_cal$', self.type) if km: self.fcall = True self.fkprobe = True self.type = km.group('n') return - km = re.match('^(?P<n>.*)_ret$', self.type) + km = re.match(r'^(?P<n>.*)_ret$', self.type) if km: self.freturn = True self.fkprobe = True @@ -2327,7 +2327,7 @@ class FTraceLine: if(d): self.length = float(d)/1000000 # the indentation determines the depth - match = re.match('^(?P<d> *)(?P<o>.*)$', m) + match = re.match(r'^(?P<d> *)(?P<o>.*)$', m) if(not match): return self.depth = self.getDepth(match.group('d')) @@ -2337,7 +2337,7 @@ class FTraceLine: self.freturn = True if(len(m) > 1): # includes comment with function name - match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m) + match = re.match(r'^} *\/\* *(?P<n>.*) *\*\/$', m) if(match): self.name = match.group('n').strip() # function call @@ -2345,13 +2345,13 @@ class FTraceLine: self.fcall = True # function call with children if(m[-1] == '{'): - match = re.match('^(?P<n>.*) *\(.*', m) + match = re.match(r'^(?P<n>.*) *\(.*', m) if(match): self.name = match.group('n').strip() # function call with no children (leaf) elif(m[-1] == ';'): self.freturn = True - match = re.match('^(?P<n>.*) *\(.*', m) + match = re.match(r'^(?P<n>.*) *\(.*', m) if(match): self.name = match.group('n').strip() # something else (possibly a trace marker) @@ -2385,7 +2385,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('suspend_enter\[.*\] begin', self.name)): + re.match(r'suspend_enter\[.*\] begin', self.name)): return True return False def endMarker(self): @@ -2398,7 +2398,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('thaw_processes\[.*\] end', self.name)): + re.match(r'thaw_processes\[.*\] end', self.name)): return True return False @@ -2976,30 +2976,30 @@ class Timeline: # Description: # A list of values describing the properties of these test runs class TestProps: - stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ - '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ - ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' - wififmt = '^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*' - tstatfmt = '^# turbostat (?P<t>\S*)' - testerrfmt = '^# enter_sleep_error (?P<e>.*)' - sysinfofmt = '^# sysinfo .*' - cmdlinefmt = '^# command \| (?P<cmd>.*)' - kparamsfmt = '^# kparams \| (?P<kp>.*)' - devpropfmt = '# Device Properties: .*' - pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)' - tracertypefmt = '# tracer: (?P<t>.*)' - firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' - procexecfmt = 'ps - (?P<ps>.*)$' - procmultifmt = '@(?P<n>[0-9]*)\|(?P<ps>.*)$' + stampfmt = r'# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ + r'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ + r' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' + wififmt = r'^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*' + tstatfmt = r'^# turbostat (?P<t>\S*)' + testerrfmt = r'^# enter_sleep_error (?P<e>.*)' + sysinfofmt = r'^# sysinfo .*' + cmdlinefmt = r'^# command \| (?P<cmd>.*)' + kparamsfmt = r'^# kparams \| (?P<kp>.*)' + devpropfmt = r'# Device Properties: .*' + pinfofmt = r'# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)' + tracertypefmt = r'# tracer: (?P<t>.*)' + firmwarefmt = r'# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' + procexecfmt = r'ps - (?P<ps>.*)$' + procmultifmt = r'@(?P<n>[0-9]*)\|(?P<ps>.*)$' ftrace_line_fmt_fg = \ - '^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ - ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ - '[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' + r'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ + r' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ + r'[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' ftrace_line_fmt_nop = \ - ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\ - '(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\ - '(?P<msg>.*)' - machinesuspend = 'machine_suspend\[.*' + r' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\ + r'(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\ + r'(?P<msg>.*)' + machinesuspend = r'machine_suspend\[.*' multiproclist = dict() multiproctime = 0.0 multiproccnt = 0 @@ -3081,14 +3081,14 @@ class TestProps: sv.hostname = data.stamp['host'] sv.suspendmode = data.stamp['mode'] if sv.suspendmode == 'freeze': - self.machinesuspend = 'timekeeping_freeze\[.*' + self.machinesuspend = r'timekeeping_freeze\[.*' else: - self.machinesuspend = 'machine_suspend\[.*' + self.machinesuspend = r'machine_suspend\[.*' if sv.suspendmode == 'command' and sv.ftracefile != '': modes = ['on', 'freeze', 'standby', 'mem', 'disk'] fp = sv.openlog(sv.ftracefile, 'r') for line in fp: - m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line) + m = re.match(r'.* machine_suspend\[(?P<mode>.*)\]', line) if m and m.group('mode') in ['1', '2', '3', '4']: sv.suspendmode = modes[int(m.group('mode'))] data.stamp['mode'] = sv.suspendmode @@ -3401,9 +3401,9 @@ def loadTraceLog(): for i in range(len(blk)): if 'SUSPEND START' in blk[i][3]: first.append(i) - elif re.match('.* timekeeping_freeze.*begin', blk[i][3]): + elif re.match(r'.* timekeeping_freeze.*begin', blk[i][3]): last.append(i) - elif re.match('.* timekeeping_freeze.*end', blk[i][3]): + elif re.match(r'.* timekeeping_freeze.*end', blk[i][3]): first.append(i) elif 'RESUME COMPLETE' in blk[i][3]: last.append(i) @@ -3514,28 +3514,28 @@ def parseTraceLog(live=False): if(t.fevent): if(t.type == 'suspend_resume'): # suspend_resume trace events have two types, begin and end - if(re.match('(?P<name>.*) begin$', t.name)): + if(re.match(r'(?P<name>.*) begin$', t.name)): isbegin = True - elif(re.match('(?P<name>.*) end$', t.name)): + elif(re.match(r'(?P<name>.*) end$', t.name)): isbegin = False else: continue if '[' in t.name: - m = re.match('(?P<name>.*)\[.*', t.name) + m = re.match(r'(?P<name>.*)\[.*', t.name) else: - m = re.match('(?P<name>.*) .*', t.name) + m = re.match(r'(?P<name>.*) .*', t.name) name = m.group('name') # ignore these events if(name.split('[')[0] in tracewatch): continue # -- phase changes -- # start of kernel suspend - if(re.match('suspend_enter\[.*', t.name)): + if(re.match(r'suspend_enter\[.*', t.name)): if(isbegin and data.tKernSus == 0): data.tKernSus = t.time continue # suspend_prepare start - elif(re.match('dpm_prepare\[.*', t.name)): + elif(re.match(r'dpm_prepare\[.*', t.name)): if isbegin and data.first_suspend_prepare: data.first_suspend_prepare = False if data.tKernSus == 0: @@ -3544,15 +3544,15 @@ def parseTraceLog(live=False): phase = data.setPhase('suspend_prepare', t.time, isbegin) continue # suspend start - elif(re.match('dpm_suspend\[.*', t.name)): + elif(re.match(r'dpm_suspend\[.*', t.name)): phase = data.setPhase('suspend', t.time, isbegin) continue # suspend_late start - elif(re.match('dpm_suspend_late\[.*', t.name)): + elif(re.match(r'dpm_suspend_late\[.*', t.name)): phase = data.setPhase('suspend_late', t.time, isbegin) continue # suspend_noirq start - elif(re.match('dpm_suspend_noirq\[.*', t.name)): + elif(re.match(r'dpm_suspend_noirq\[.*', t.name)): phase = data.setPhase('suspend_noirq', t.time, isbegin) continue # suspend_machine/resume_machine @@ -3589,19 +3589,19 @@ def parseTraceLog(live=False): data.tResumed = t.time continue # resume_noirq start - elif(re.match('dpm_resume_noirq\[.*', t.name)): + elif(re.match(r'dpm_resume_noirq\[.*', t.name)): phase = data.setPhase('resume_noirq', t.time, isbegin) continue # resume_early start - elif(re.match('dpm_resume_early\[.*', t.name)): + elif(re.match(r'dpm_resume_early\[.*', t.name)): phase = data.setPhase('resume_early', t.time, isbegin) continue # resume start - elif(re.match('dpm_resume\[.*', t.name)): + elif(re.match(r'dpm_resume\[.*', t.name)): phase = data.setPhase('resume', t.time, isbegin) continue # resume complete start - elif(re.match('dpm_complete\[.*', t.name)): + elif(re.match(r'dpm_complete\[.*', t.name)): phase = data.setPhase('resume_complete', t.time, isbegin) continue # skip trace events inside devices calls @@ -3635,7 +3635,7 @@ def parseTraceLog(live=False): elif(t.type == 'device_pm_callback_start'): if phase not in data.dmesg: continue - m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\ + m = re.match(r'(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\ t.name); if(not m): continue @@ -3650,7 +3650,7 @@ def parseTraceLog(live=False): elif(t.type == 'device_pm_callback_end'): if phase not in data.dmesg: continue - m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name); + m = re.match(r'(?P<drv>.*) (?P<d>.*), err.*', t.name); if(not m): continue n = m.group('d') @@ -3904,24 +3904,24 @@ def loadKernelLog(): line = line[idx:] if tp.stampInfo(line, sysvals): continue - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue msg = m.group("msg") - if re.match('PM: Syncing filesystems.*', msg) or \ - re.match('PM: suspend entry.*', msg): + if re.match(r'PM: Syncing filesystems.*', msg) or \ + re.match(r'PM: suspend entry.*', msg): if(data): testruns.append(data) data = Data(len(testruns)) tp.parseStamp(data, sysvals) if(not data): continue - m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg) + m = re.match(r'.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg) if(m): sysvals.stamp['kernel'] = m.group('k') - m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg) + m = re.match(r'PM: Preparing system for (?P<m>.*) sleep', msg) if not m: - m = re.match('PM: Preparing system for sleep \((?P<m>.*)\)', msg) + m = re.match(r'PM: Preparing system for sleep \((?P<m>.*)\)', msg) if m: sysvals.stamp['mode'] = sysvals.suspendmode = m.group('m') data.dmesgtext.append(line) @@ -3984,7 +3984,7 @@ def parseKernelLog(data): 'resume_machine': ['[PM: ]*Timekeeping suspended for.*', 'ACPI: Low-level resume complete.*', 'ACPI: resume from mwait', - 'Suspended for [0-9\.]* seconds'], + r'Suspended for [0-9\.]* seconds'], 'resume_noirq': ['PM: resume from suspend-to-idle', 'ACPI: Waking up from system sleep state.*'], 'resume_early': ['PM: noirq resume of devices complete after.*', @@ -3993,7 +3993,7 @@ def parseKernelLog(data): 'PM: early restore of devices complete after.*'], 'resume_complete': ['PM: resume of devices complete after.*', 'PM: restore of devices complete after.*'], - 'post_resume': ['.*Restarting tasks \.\.\..*'], + 'post_resume': [r'.*Restarting tasks \.\.\..*'], } # action table (expected events that occur and show up in dmesg) @@ -4021,7 +4021,7 @@ def parseKernelLog(data): actions = dict() for line in data.dmesgtext: # parse each dmesg line into the time and message - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(m): val = m.group('ktime') try: @@ -4145,26 +4145,26 @@ def parseKernelLog(data): if(a in actions and actions[a][-1]['begin'] == actions[a][-1]['end']): actions[a][-1]['end'] = ktime # now look for CPU on/off events - if(re.match('Disabling non-boot CPUs .*', msg)): + if(re.match(r'Disabling non-boot CPUs .*', msg)): # start of first cpu suspend cpu_start = ktime - elif(re.match('Enabling non-boot CPUs .*', msg)): + elif(re.match(r'Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \ - or re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg)): + elif(re.match(r'smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \ + or re.match(r'psci: CPU(?P<cpu>[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next - m = re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) + m = re.match(r'smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) if(not m): - m = re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg) + m = re.match(r'psci: CPU(?P<cpu>[0-9]*) killed.*', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] actions[cpu].append({'begin': cpu_start, 'end': ktime}) cpu_start = ktime - elif(re.match('CPU(?P<cpu>[0-9]*) is up', msg)): + elif(re.match(r'CPU(?P<cpu>[0-9]*) is up', msg)): # end of a cpu resume, start of the next - m = re.match('CPU(?P<cpu>[0-9]*) is up', msg) + m = re.match(r'CPU(?P<cpu>[0-9]*) is up', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] @@ -4343,7 +4343,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): list[mode]['data'].append([data['host'], data['kernel'], data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], - data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi]) + data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi, + (data['fullmode'] if 'fullmode' in data else mode)]) idx = len(list[mode]['data']) - 1 if res.startswith('fail in'): res = 'fail' @@ -4449,7 +4450,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): elif idx == iMed[i]: tHigh[i] = ' id="%smed" class=medval title="Median"' % tag html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row - html += td.format(mode) # mode + html += td.format(d[15]) # mode html += td.format(d[0]) # host html += td.format(d[1]) # kernel html += td.format(d[2]) # time @@ -5061,6 +5062,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): def addScriptCode(hf, testruns): t0 = testruns[0].start * 1000 tMax = testruns[-1].end * 1000 + hf.write('<script type="text/javascript">\n'); # create an array in javascript memory with the device details detail = ' var devtable = [];\n' for data in testruns: @@ -5068,384 +5070,383 @@ def addScriptCode(hf, testruns): detail += ' devtable[%d] = "%s";\n' % (data.testnumber, topo) detail += ' var bounds = [%f,%f];\n' % (t0, tMax) # add the code which will manipulate the data in the browser - script_code = \ - '<script type="text/javascript">\n'+detail+\ - ' var resolution = -1;\n'\ - ' var dragval = [0, 0];\n'\ - ' function redrawTimescale(t0, tMax, tS) {\n'\ - ' var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;">\';\n'\ - ' var tTotal = tMax - t0;\n'\ - ' var list = document.getElementsByClassName("tblock");\n'\ - ' for (var i = 0; i < list.length; i++) {\n'\ - ' var timescale = list[i].getElementsByClassName("timescale")[0];\n'\ - ' var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\ - ' var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\ - ' var mMax = m0 + mTotal;\n'\ - ' var html = "";\n'\ - ' var divTotal = Math.floor(mTotal/tS) + 1;\n'\ - ' if(divTotal > 1000) continue;\n'\ - ' var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\ - ' var pos = 0.0, val = 0.0;\n'\ - ' for (var j = 0; j < divTotal; j++) {\n'\ - ' var htmlline = "";\n'\ - ' var mode = list[i].id[5];\n'\ - ' if(mode == "s") {\n'\ - ' pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\ - ' val = (j-divTotal+1)*tS;\n'\ - ' if(j == divTotal - 1)\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S→</cS></div>\';\n'\ - ' else\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ - ' } else {\n'\ - ' pos = 100 - (((j)*tS*100)/mTotal);\n'\ - ' val = (j)*tS;\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ - ' if(j == 0)\n'\ - ' if(mode == "r")\n'\ - ' htmlline = rline+"<cS>←R</cS></div>";\n'\ - ' else\n'\ - ' htmlline = rline+"<cS>0ms</div>";\n'\ - ' }\n'\ - ' html += htmlline;\n'\ - ' }\n'\ - ' timescale.innerHTML = html;\n'\ - ' }\n'\ - ' }\n'\ - ' function zoomTimeline() {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var zoombox = document.getElementById("dmesgzoombox");\n'\ - ' var left = zoombox.scrollLeft;\n'\ - ' var val = parseFloat(dmesg.style.width);\n'\ - ' var newval = 100;\n'\ - ' var sh = window.outerWidth / 2;\n'\ - ' if(this.id == "zoomin") {\n'\ - ' newval = val * 1.2;\n'\ - ' if(newval > 910034) newval = 910034;\n'\ - ' dmesg.style.width = newval+"%";\n'\ - ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\ - ' } else if (this.id == "zoomout") {\n'\ - ' newval = val / 1.2;\n'\ - ' if(newval < 100) newval = 100;\n'\ - ' dmesg.style.width = newval+"%";\n'\ - ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\ - ' } else {\n'\ - ' zoombox.scrollLeft = 0;\n'\ - ' dmesg.style.width = "100%";\n'\ - ' }\n'\ - ' var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\ - ' var t0 = bounds[0];\n'\ - ' var tMax = bounds[1];\n'\ - ' var tTotal = tMax - t0;\n'\ - ' var wTotal = tTotal * 100.0 / newval;\n'\ - ' var idx = 7*window.innerWidth/1100;\n'\ - ' for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\ - ' if(i >= tS.length) i = tS.length - 1;\n'\ - ' if(tS[i] == resolution) return;\n'\ - ' resolution = tS[i];\n'\ - ' redrawTimescale(t0, tMax, tS[i]);\n'\ - ' }\n'\ - ' function deviceName(title) {\n'\ - ' var name = title.slice(0, title.indexOf(" ("));\n'\ - ' return name;\n'\ - ' }\n'\ - ' function deviceHover() {\n'\ - ' var name = deviceName(this.title);\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' var cpu = -1;\n'\ - ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(7));\n'\ - ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(8));\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dname = deviceName(dev[i].title);\n'\ - ' var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ - ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\ - ' (name == dname))\n'\ - ' {\n'\ - ' dev[i].className = "hover "+cname;\n'\ - ' } else {\n'\ - ' dev[i].className = cname;\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function deviceUnhover() {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ - ' }\n'\ - ' }\n'\ - ' function deviceTitle(title, total, cpu) {\n'\ - ' var prefix = "Total";\n'\ - ' if(total.length > 3) {\n'\ - ' prefix = "Average";\n'\ - ' total[1] = (total[1]+total[3])/2;\n'\ - ' total[2] = (total[2]+total[4])/2;\n'\ - ' }\n'\ - ' var devtitle = document.getElementById("devicedetailtitle");\n'\ - ' var name = deviceName(title);\n'\ - ' if(cpu >= 0) name = "CPU"+cpu;\n'\ - ' var driver = "";\n'\ - ' var tS = "<t2>(</t2>";\n'\ - ' var tR = "<t2>)</t2>";\n'\ - ' if(total[1] > 0)\n'\ - ' tS = "<t2>("+prefix+" Suspend:</t2><t0> "+total[1].toFixed(3)+" ms</t0> ";\n'\ - ' if(total[2] > 0)\n'\ - ' tR = " <t2>"+prefix+" Resume:</t2><t0> "+total[2].toFixed(3)+" ms<t2>)</t2></t0>";\n'\ - ' var s = title.indexOf("{");\n'\ - ' var e = title.indexOf("}");\n'\ - ' if((s >= 0) && (e >= 0))\n'\ - ' driver = title.slice(s+1, e) + " <t1>@</t1> ";\n'\ - ' if(total[1] > 0 && total[2] > 0)\n'\ - ' devtitle.innerHTML = "<t0>"+driver+name+"</t0> "+tS+tR;\n'\ - ' else\n'\ - ' devtitle.innerHTML = "<t0>"+title+"</t0>";\n'\ - ' return name;\n'\ - ' }\n'\ - ' function deviceDetail() {\n'\ - ' var devinfo = document.getElementById("devicedetail");\n'\ - ' devinfo.style.display = "block";\n'\ - ' var name = deviceName(this.title);\n'\ - ' var cpu = -1;\n'\ - ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(7));\n'\ - ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(8));\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' var idlist = [];\n'\ - ' var pdata = [[]];\n'\ - ' if(document.getElementById("devicedetail1"))\n'\ - ' pdata = [[], []];\n'\ - ' var pd = pdata[0];\n'\ - ' var total = [0.0, 0.0, 0.0];\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dname = deviceName(dev[i].title);\n'\ - ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\ - ' (name == dname))\n'\ - ' {\n'\ - ' idlist[idlist.length] = dev[i].id;\n'\ - ' var tidx = 1;\n'\ - ' if(dev[i].id[0] == "a") {\n'\ - ' pd = pdata[0];\n'\ - ' } else {\n'\ - ' if(pdata.length == 1) pdata[1] = [];\n'\ - ' if(total.length == 3) total[3]=total[4]=0.0;\n'\ - ' pd = pdata[1];\n'\ - ' tidx = 3;\n'\ - ' }\n'\ - ' var info = dev[i].title.split(" ");\n'\ - ' var pname = info[info.length-1];\n'\ - ' pd[pname] = parseFloat(info[info.length-3].slice(1));\n'\ - ' total[0] += pd[pname];\n'\ - ' if(pname.indexOf("suspend") >= 0)\n'\ - ' total[tidx] += pd[pname];\n'\ - ' else\n'\ - ' total[tidx+1] += pd[pname];\n'\ - ' }\n'\ - ' }\n'\ - ' var devname = deviceTitle(this.title, total, cpu);\n'\ - ' var left = 0.0;\n'\ - ' for (var t = 0; t < pdata.length; t++) {\n'\ - ' pd = pdata[t];\n'\ - ' devinfo = document.getElementById("devicedetail"+t);\n'\ - ' var phases = devinfo.getElementsByClassName("phaselet");\n'\ - ' for (var i = 0; i < phases.length; i++) {\n'\ - ' if(phases[i].id in pd) {\n'\ - ' var w = 100.0*pd[phases[i].id]/total[0];\n'\ - ' var fs = 32;\n'\ - ' if(w < 8) fs = 4*w | 0;\n'\ - ' var fs2 = fs*3/4;\n'\ - ' phases[i].style.width = w+"%";\n'\ - ' phases[i].style.left = left+"%";\n'\ - ' phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms";\n'\ - ' left += w;\n'\ - ' var time = "<t4 style=\\"font-size:"+fs+"px\\">"+pd[phases[i].id]+" ms<br></t4>";\n'\ - ' var pname = "<t3 style=\\"font-size:"+fs2+"px\\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>";\n'\ - ' phases[i].innerHTML = time+pname;\n'\ - ' } else {\n'\ - ' phases[i].style.width = "0%";\n'\ - ' phases[i].style.left = left+"%";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' if(typeof devstats !== \'undefined\')\n'\ - ' callDetail(this.id, this.title);\n'\ - ' var cglist = document.getElementById("callgraphs");\n'\ - ' if(!cglist) return;\n'\ - ' var cg = cglist.getElementsByClassName("atop");\n'\ - ' if(cg.length < 10) return;\n'\ - ' for (var i = 0; i < cg.length; i++) {\n'\ - ' cgid = cg[i].id.split("x")[0]\n'\ - ' if(idlist.indexOf(cgid) >= 0) {\n'\ - ' cg[i].style.display = "block";\n'\ - ' } else {\n'\ - ' cg[i].style.display = "none";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function callDetail(devid, devtitle) {\n'\ - ' if(!(devid in devstats) || devstats[devid].length < 1)\n'\ - ' return;\n'\ - ' var list = devstats[devid];\n'\ - ' var tmp = devtitle.split(" ");\n'\ - ' var name = tmp[0], phase = tmp[tmp.length-1];\n'\ - ' var dd = document.getElementById(phase);\n'\ - ' var total = parseFloat(tmp[1].slice(1));\n'\ - ' var mlist = [];\n'\ - ' var maxlen = 0;\n'\ - ' var info = []\n'\ - ' for(var i in list) {\n'\ - ' if(list[i][0] == "@") {\n'\ - ' info = list[i].split("|");\n'\ - ' continue;\n'\ - ' }\n'\ - ' var tmp = list[i].split("|");\n'\ - ' var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\ - ' var p = (t*100.0/total).toFixed(2);\n'\ - ' mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\ - ' if(f.length > maxlen)\n'\ - ' maxlen = f.length;\n'\ - ' }\n'\ - ' var pad = 5;\n'\ - ' if(mlist.length == 0) pad = 30;\n'\ - ' var html = \'<div style="padding-top:\'+pad+\'px"><t3> <b>\'+name+\':</b>\';\n'\ - ' if(info.length > 2)\n'\ - ' html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>";\n'\ - ' if(info.length > 3)\n'\ - ' html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>";\n'\ - ' if(info.length > 4)\n'\ - ' html += ", return=<b>"+info[4]+"</b>";\n'\ - ' html += "</t3></div>";\n'\ - ' if(mlist.length > 0) {\n'\ - ' html += \'<table class=fstat style="padding-top:\'+(maxlen*5)+\'px;"><tr><th>Function</th>\';\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td class=vt>"+mlist[i][0]+"</td>";\n'\ - ' html += "</tr><tr><th>Calls</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][1]+"</td>";\n'\ - ' html += "</tr><tr><th>Time(ms)</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][2]+"</td>";\n'\ - ' html += "</tr><tr><th>Percent</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][3]+"</td>";\n'\ - ' html += "</tr></table>";\n'\ - ' }\n'\ - ' dd.innerHTML = html;\n'\ - ' var height = (maxlen*5)+100;\n'\ - ' dd.style.height = height+"px";\n'\ - ' document.getElementById("devicedetail").style.height = height+"px";\n'\ - ' }\n'\ - ' function callSelect() {\n'\ - ' var cglist = document.getElementById("callgraphs");\n'\ - ' if(!cglist) return;\n'\ - ' var cg = cglist.getElementsByClassName("atop");\n'\ - ' for (var i = 0; i < cg.length; i++) {\n'\ - ' if(this.id == cg[i].id) {\n'\ - ' cg[i].style.display = "block";\n'\ - ' } else {\n'\ - ' cg[i].style.display = "none";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function devListWindow(e) {\n'\ - ' var win = window.open();\n'\ - ' var html = "<title>"+e.target.innerHTML+"</title>"+\n'\ - ' "<style type=\\"text/css\\">"+\n'\ - ' " ul {list-style-type:circle;padding-left:10px;margin-left:10px;}"+\n'\ - ' "</style>"\n'\ - ' var dt = devtable[0];\n'\ - ' if(e.target.id != "devlist1")\n'\ - ' dt = devtable[1];\n'\ - ' win.document.write(html+dt);\n'\ - ' }\n'\ - ' function errWindow() {\n'\ - ' var range = this.id.split("_");\n'\ - ' var idx1 = parseInt(range[0]);\n'\ - ' var idx2 = parseInt(range[1]);\n'\ - ' var win = window.open();\n'\ - ' var log = document.getElementById("dmesglog");\n'\ - ' var title = "<title>dmesg log</title>";\n'\ - ' var text = log.innerHTML.split("\\n");\n'\ - ' var html = "";\n'\ - ' for(var i = 0; i < text.length; i++) {\n'\ - ' if(i == idx1) {\n'\ - ' html += "<e id=target>"+text[i]+"</e>\\n";\n'\ - ' } else if(i > idx1 && i <= idx2) {\n'\ - ' html += "<e>"+text[i]+"</e>\\n";\n'\ - ' } else {\n'\ - ' html += text[i]+"\\n";\n'\ - ' }\n'\ - ' }\n'\ - ' win.document.write("<style>e{color:red}</style>"+title+"<pre>"+html+"</pre>");\n'\ - ' win.location.hash = "#target";\n'\ - ' win.document.close();\n'\ - ' }\n'\ - ' function logWindow(e) {\n'\ - ' var name = e.target.id.slice(4);\n'\ - ' var win = window.open();\n'\ - ' var log = document.getElementById(name+"log");\n'\ - ' var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\ - ' win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\ - ' win.document.close();\n'\ - ' }\n'\ - ' function onMouseDown(e) {\n'\ - ' dragval[0] = e.clientX;\n'\ - ' dragval[1] = document.getElementById("dmesgzoombox").scrollLeft;\n'\ - ' document.onmousemove = onMouseMove;\n'\ - ' }\n'\ - ' function onMouseMove(e) {\n'\ - ' var zoombox = document.getElementById("dmesgzoombox");\n'\ - ' zoombox.scrollLeft = dragval[1] + dragval[0] - e.clientX;\n'\ - ' }\n'\ - ' function onMouseUp(e) {\n'\ - ' document.onmousemove = null;\n'\ - ' }\n'\ - ' function onKeyPress(e) {\n'\ - ' var c = e.charCode;\n'\ - ' if(c != 42 && c != 43 && c != 45) return;\n'\ - ' var click = document.createEvent("Events");\n'\ - ' click.initEvent("click", true, false);\n'\ - ' if(c == 43) \n'\ - ' document.getElementById("zoomin").dispatchEvent(click);\n'\ - ' else if(c == 45)\n'\ - ' document.getElementById("zoomout").dispatchEvent(click);\n'\ - ' else if(c == 42)\n'\ - ' document.getElementById("zoomdef").dispatchEvent(click);\n'\ - ' }\n'\ - ' window.addEventListener("resize", function () {zoomTimeline();});\n'\ - ' window.addEventListener("load", function () {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' dmesg.style.width = "100%"\n'\ - ' dmesg.onmousedown = onMouseDown;\n'\ - ' document.onmouseup = onMouseUp;\n'\ - ' document.onkeypress = onKeyPress;\n'\ - ' document.getElementById("zoomin").onclick = zoomTimeline;\n'\ - ' document.getElementById("zoomout").onclick = zoomTimeline;\n'\ - ' document.getElementById("zoomdef").onclick = zoomTimeline;\n'\ - ' var list = document.getElementsByClassName("err");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = errWindow;\n'\ - ' var list = document.getElementsByClassName("logbtn");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = logWindow;\n'\ - ' list = document.getElementsByClassName("devlist");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = devListWindow;\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dev[i].onclick = deviceDetail;\n'\ - ' dev[i].onmouseover = deviceHover;\n'\ - ' dev[i].onmouseout = deviceUnhover;\n'\ - ' }\n'\ - ' var dev = dmesg.getElementsByClassName("srccall");\n'\ - ' for (var i = 0; i < dev.length; i++)\n'\ - ' dev[i].onclick = callSelect;\n'\ - ' zoomTimeline();\n'\ - ' });\n'\ - '</script>\n' + hf.write(detail); + script_code = r""" var resolution = -1; + var dragval = [0, 0]; + function redrawTimescale(t0, tMax, tS) { + var rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">'; + var tTotal = tMax - t0; + var list = document.getElementsByClassName("tblock"); + for (var i = 0; i < list.length; i++) { + var timescale = list[i].getElementsByClassName("timescale")[0]; + var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100); + var mTotal = tTotal*parseFloat(list[i].style.width)/100; + var mMax = m0 + mTotal; + var html = ""; + var divTotal = Math.floor(mTotal/tS) + 1; + if(divTotal > 1000) continue; + var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal; + var pos = 0.0, val = 0.0; + for (var j = 0; j < divTotal; j++) { + var htmlline = ""; + var mode = list[i].id[5]; + if(mode == "s") { + pos = 100 - (((j)*tS*100)/mTotal) - divEdge; + val = (j-divTotal+1)*tS; + if(j == divTotal - 1) + htmlline = '<div class="t" style="right:'+pos+'%"><cS>S→</cS></div>'; + else + htmlline = '<div class="t" style="right:'+pos+'%">'+val+'ms</div>'; + } else { + pos = 100 - (((j)*tS*100)/mTotal); + val = (j)*tS; + htmlline = '<div class="t" style="right:'+pos+'%">'+val+'ms</div>'; + if(j == 0) + if(mode == "r") + htmlline = rline+"<cS>←R</cS></div>"; + else + htmlline = rline+"<cS>0ms</div>"; + } + html += htmlline; + } + timescale.innerHTML = html; + } + } + function zoomTimeline() { + var dmesg = document.getElementById("dmesg"); + var zoombox = document.getElementById("dmesgzoombox"); + var left = zoombox.scrollLeft; + var val = parseFloat(dmesg.style.width); + var newval = 100; + var sh = window.outerWidth / 2; + if(this.id == "zoomin") { + newval = val * 1.2; + if(newval > 910034) newval = 910034; + dmesg.style.width = newval+"%"; + zoombox.scrollLeft = ((left + sh) * newval / val) - sh; + } else if (this.id == "zoomout") { + newval = val / 1.2; + if(newval < 100) newval = 100; + dmesg.style.width = newval+"%"; + zoombox.scrollLeft = ((left + sh) * newval / val) - sh; + } else { + zoombox.scrollLeft = 0; + dmesg.style.width = "100%"; + } + var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1]; + var t0 = bounds[0]; + var tMax = bounds[1]; + var tTotal = tMax - t0; + var wTotal = tTotal * 100.0 / newval; + var idx = 7*window.innerWidth/1100; + for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++); + if(i >= tS.length) i = tS.length - 1; + if(tS[i] == resolution) return; + resolution = tS[i]; + redrawTimescale(t0, tMax, tS[i]); + } + function deviceName(title) { + var name = title.slice(0, title.indexOf(" (")); + return name; + } + function deviceHover() { + var name = deviceName(this.title); + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + var cpu = -1; + if(name.match("CPU_ON\[[0-9]*\]")) + cpu = parseInt(name.slice(7)); + else if(name.match("CPU_OFF\[[0-9]*\]")) + cpu = parseInt(name.slice(8)); + for (var i = 0; i < dev.length; i++) { + dname = deviceName(dev[i].title); + var cname = dev[i].className.slice(dev[i].className.indexOf("thread")); + if((cpu >= 0 && dname.match("CPU_O[NF]*\\[*"+cpu+"\\]")) || + (name == dname)) + { + dev[i].className = "hover "+cname; + } else { + dev[i].className = cname; + } + } + } + function deviceUnhover() { + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + for (var i = 0; i < dev.length; i++) { + dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread")); + } + } + function deviceTitle(title, total, cpu) { + var prefix = "Total"; + if(total.length > 3) { + prefix = "Average"; + total[1] = (total[1]+total[3])/2; + total[2] = (total[2]+total[4])/2; + } + var devtitle = document.getElementById("devicedetailtitle"); + var name = deviceName(title); + if(cpu >= 0) name = "CPU"+cpu; + var driver = ""; + var tS = "<t2>(</t2>"; + var tR = "<t2>)</t2>"; + if(total[1] > 0) + tS = "<t2>("+prefix+" Suspend:</t2><t0> "+total[1].toFixed(3)+" ms</t0> "; + if(total[2] > 0) + tR = " <t2>"+prefix+" Resume:</t2><t0> "+total[2].toFixed(3)+" ms<t2>)</t2></t0>"; + var s = title.indexOf("{"); + var e = title.indexOf("}"); + if((s >= 0) && (e >= 0)) + driver = title.slice(s+1, e) + " <t1>@</t1> "; + if(total[1] > 0 && total[2] > 0) + devtitle.innerHTML = "<t0>"+driver+name+"</t0> "+tS+tR; + else + devtitle.innerHTML = "<t0>"+title+"</t0>"; + return name; + } + function deviceDetail() { + var devinfo = document.getElementById("devicedetail"); + devinfo.style.display = "block"; + var name = deviceName(this.title); + var cpu = -1; + if(name.match("CPU_ON\[[0-9]*\]")) + cpu = parseInt(name.slice(7)); + else if(name.match("CPU_OFF\[[0-9]*\]")) + cpu = parseInt(name.slice(8)); + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + var idlist = []; + var pdata = [[]]; + if(document.getElementById("devicedetail1")) + pdata = [[], []]; + var pd = pdata[0]; + var total = [0.0, 0.0, 0.0]; + for (var i = 0; i < dev.length; i++) { + dname = deviceName(dev[i].title); + if((cpu >= 0 && dname.match("CPU_O[NF]*\\[*"+cpu+"\\]")) || + (name == dname)) + { + idlist[idlist.length] = dev[i].id; + var tidx = 1; + if(dev[i].id[0] == "a") { + pd = pdata[0]; + } else { + if(pdata.length == 1) pdata[1] = []; + if(total.length == 3) total[3]=total[4]=0.0; + pd = pdata[1]; + tidx = 3; + } + var info = dev[i].title.split(" "); + var pname = info[info.length-1]; + pd[pname] = parseFloat(info[info.length-3].slice(1)); + total[0] += pd[pname]; + if(pname.indexOf("suspend") >= 0) + total[tidx] += pd[pname]; + else + total[tidx+1] += pd[pname]; + } + } + var devname = deviceTitle(this.title, total, cpu); + var left = 0.0; + for (var t = 0; t < pdata.length; t++) { + pd = pdata[t]; + devinfo = document.getElementById("devicedetail"+t); + var phases = devinfo.getElementsByClassName("phaselet"); + for (var i = 0; i < phases.length; i++) { + if(phases[i].id in pd) { + var w = 100.0*pd[phases[i].id]/total[0]; + var fs = 32; + if(w < 8) fs = 4*w | 0; + var fs2 = fs*3/4; + phases[i].style.width = w+"%"; + phases[i].style.left = left+"%"; + phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms"; + left += w; + var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id]+" ms<br></t4>"; + var pname = "<t3 style=\"font-size:"+fs2+"px\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>"; + phases[i].innerHTML = time+pname; + } else { + phases[i].style.width = "0%"; + phases[i].style.left = left+"%"; + } + } + } + if(typeof devstats !== 'undefined') + callDetail(this.id, this.title); + var cglist = document.getElementById("callgraphs"); + if(!cglist) return; + var cg = cglist.getElementsByClassName("atop"); + if(cg.length < 10) return; + for (var i = 0; i < cg.length; i++) { + cgid = cg[i].id.split("x")[0] + if(idlist.indexOf(cgid) >= 0) { + cg[i].style.display = "block"; + } else { + cg[i].style.display = "none"; + } + } + } + function callDetail(devid, devtitle) { + if(!(devid in devstats) || devstats[devid].length < 1) + return; + var list = devstats[devid]; + var tmp = devtitle.split(" "); + var name = tmp[0], phase = tmp[tmp.length-1]; + var dd = document.getElementById(phase); + var total = parseFloat(tmp[1].slice(1)); + var mlist = []; + var maxlen = 0; + var info = [] + for(var i in list) { + if(list[i][0] == "@") { + info = list[i].split("|"); + continue; + } + var tmp = list[i].split("|"); + var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]); + var p = (t*100.0/total).toFixed(2); + mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"]; + if(f.length > maxlen) + maxlen = f.length; + } + var pad = 5; + if(mlist.length == 0) pad = 30; + var html = '<div style="padding-top:'+pad+'px"><t3> <b>'+name+':</b>'; + if(info.length > 2) + html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>"; + if(info.length > 3) + html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>"; + if(info.length > 4) + html += ", return=<b>"+info[4]+"</b>"; + html += "</t3></div>"; + if(mlist.length > 0) { + html += '<table class=fstat style="padding-top:'+(maxlen*5)+'px;"><tr><th>Function</th>'; + for(var i in mlist) + html += "<td class=vt>"+mlist[i][0]+"</td>"; + html += "</tr><tr><th>Calls</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][1]+"</td>"; + html += "</tr><tr><th>Time(ms)</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][2]+"</td>"; + html += "</tr><tr><th>Percent</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][3]+"</td>"; + html += "</tr></table>"; + } + dd.innerHTML = html; + var height = (maxlen*5)+100; + dd.style.height = height+"px"; + document.getElementById("devicedetail").style.height = height+"px"; + } + function callSelect() { + var cglist = document.getElementById("callgraphs"); + if(!cglist) return; + var cg = cglist.getElementsByClassName("atop"); + for (var i = 0; i < cg.length; i++) { + if(this.id == cg[i].id) { + cg[i].style.display = "block"; + } else { + cg[i].style.display = "none"; + } + } + } + function devListWindow(e) { + var win = window.open(); + var html = "<title>"+e.target.innerHTML+"</title>"+ + "<style type=\"text/css\">"+ + " ul {list-style-type:circle;padding-left:10px;margin-left:10px;}"+ + "</style>" + var dt = devtable[0]; + if(e.target.id != "devlist1") + dt = devtable[1]; + win.document.write(html+dt); + } + function errWindow() { + var range = this.id.split("_"); + var idx1 = parseInt(range[0]); + var idx2 = parseInt(range[1]); + var win = window.open(); + var log = document.getElementById("dmesglog"); + var title = "<title>dmesg log</title>"; + var text = log.innerHTML.split("\n"); + var html = ""; + for(var i = 0; i < text.length; i++) { + if(i == idx1) { + html += "<e id=target>"+text[i]+"</e>\n"; + } else if(i > idx1 && i <= idx2) { + html += "<e>"+text[i]+"</e>\n"; + } else { + html += text[i]+"\n"; + } + } + win.document.write("<style>e{color:red}</style>"+title+"<pre>"+html+"</pre>"); + win.location.hash = "#target"; + win.document.close(); + } + function logWindow(e) { + var name = e.target.id.slice(4); + var win = window.open(); + var log = document.getElementById(name+"log"); + var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>"; + win.document.write(title+"<pre>"+log.innerHTML+"</pre>"); + win.document.close(); + } + function onMouseDown(e) { + dragval[0] = e.clientX; + dragval[1] = document.getElementById("dmesgzoombox").scrollLeft; + document.onmousemove = onMouseMove; + } + function onMouseMove(e) { + var zoombox = document.getElementById("dmesgzoombox"); + zoombox.scrollLeft = dragval[1] + dragval[0] - e.clientX; + } + function onMouseUp(e) { + document.onmousemove = null; + } + function onKeyPress(e) { + var c = e.charCode; + if(c != 42 && c != 43 && c != 45) return; + var click = document.createEvent("Events"); + click.initEvent("click", true, false); + if(c == 43) + document.getElementById("zoomin").dispatchEvent(click); + else if(c == 45) + document.getElementById("zoomout").dispatchEvent(click); + else if(c == 42) + document.getElementById("zoomdef").dispatchEvent(click); + } + window.addEventListener("resize", function () {zoomTimeline();}); + window.addEventListener("load", function () { + var dmesg = document.getElementById("dmesg"); + dmesg.style.width = "100%" + dmesg.onmousedown = onMouseDown; + document.onmouseup = onMouseUp; + document.onkeypress = onKeyPress; + document.getElementById("zoomin").onclick = zoomTimeline; + document.getElementById("zoomout").onclick = zoomTimeline; + document.getElementById("zoomdef").onclick = zoomTimeline; + var list = document.getElementsByClassName("err"); + for (var i = 0; i < list.length; i++) + list[i].onclick = errWindow; + var list = document.getElementsByClassName("logbtn"); + for (var i = 0; i < list.length; i++) + list[i].onclick = logWindow; + list = document.getElementsByClassName("devlist"); + for (var i = 0; i < list.length; i++) + list[i].onclick = devListWindow; + var dev = dmesg.getElementsByClassName("thread"); + for (var i = 0; i < dev.length; i++) { + dev[i].onclick = deviceDetail; + dev[i].onmouseover = deviceHover; + dev[i].onmouseout = deviceUnhover; + } + var dev = dmesg.getElementsByClassName("srccall"); + for (var i = 0; i < dev.length; i++) + dev[i].onclick = callSelect; + zoomTimeline(); + }); +</script> """ hf.write(script_code); # Function: executeSuspend @@ -5524,7 +5525,9 @@ def executeSuspend(quiet=False): if ((mode == 'freeze') or (sv.memmode == 's2idle')) \ and sv.haveTurbostat(): # execution will pause here - turbo = sv.turbostat(s0ixready) + retval, turbo = sv.turbostat(s0ixready) + if retval != 0: + tdata['error'] ='turbostat returned %d' % retval if turbo: tdata['turbo'] = turbo else: @@ -5532,6 +5535,7 @@ def executeSuspend(quiet=False): pf.write(mode) # execution will pause here try: + pf.flush() pf.close() except Exception as e: tdata['error'] = str(e) @@ -5633,7 +5637,7 @@ def deviceInfo(output=''): tgtval = 'runtime_status' lines = dict() for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(not re.match('.*/power', dirname) or + if(not re.match(r'.*/power', dirname) or 'control' not in filenames or tgtval not in filenames): continue @@ -5702,6 +5706,40 @@ def getModes(): fp.close() return modes +def dmidecode_backup(out, fatal=False): + cpath, spath, info = '/proc/cpuinfo', '/sys/class/dmi/id', { + 'bios-vendor': 'bios_vendor', + 'bios-version': 'bios_version', + 'bios-release-date': 'bios_date', + 'system-manufacturer': 'sys_vendor', + 'system-product-name': 'product_name', + 'system-version': 'product_version', + 'system-serial-number': 'product_serial', + 'baseboard-manufacturer': 'board_vendor', + 'baseboard-product-name': 'board_name', + 'baseboard-version': 'board_version', + 'baseboard-serial-number': 'board_serial', + 'chassis-manufacturer': 'chassis_vendor', + 'chassis-version': 'chassis_version', + 'chassis-serial-number': 'chassis_serial', + } + for key in info: + if key not in out: + val = sysvals.getVal(os.path.join(spath, info[key])).strip() + if val and val.lower() != 'to be filled by o.e.m.': + out[key] = val + if 'processor-version' not in out and os.path.exists(cpath): + with open(cpath, 'r') as fp: + for line in fp: + m = re.match(r'^model\s*name\s*\:\s*(?P<c>.*)', line) + if m: + out['processor-version'] = m.group('c').strip() + break + if fatal and len(out) < 1: + doError('dmidecode failed to get info from %s or %s' % \ + (sysvals.mempath, spath)) + return out + # Function: dmidecode # Description: # Read the bios tables and pull out system info @@ -5712,6 +5750,8 @@ def getModes(): # A dict object with all available key/values def dmidecode(mempath, fatal=False): out = dict() + if(not (os.path.exists(mempath) and os.access(mempath, os.R_OK))): + return dmidecode_backup(out, fatal) # the list of values to retrieve, with hardcoded (type, idx) info = { @@ -5727,24 +5767,14 @@ def dmidecode(mempath, fatal=False): 'baseboard-version': (2, 6), 'baseboard-serial-number': (2, 7), 'chassis-manufacturer': (3, 4), - 'chassis-type': (3, 5), 'chassis-version': (3, 6), 'chassis-serial-number': (3, 7), 'processor-manufacturer': (4, 7), 'processor-version': (4, 16), } - if(not os.path.exists(mempath)): - if(fatal): - doError('file does not exist: %s' % mempath) - return out - if(not os.access(mempath, os.R_OK)): - if(fatal): - doError('file is not readable: %s' % mempath) - return out # by default use legacy scan, but try to use EFI first - memaddr = 0xf0000 - memsize = 0x10000 + memaddr, memsize = 0xf0000, 0x10000 for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']: if not os.path.exists(ep) or not os.access(ep, os.R_OK): continue @@ -5765,11 +5795,7 @@ def dmidecode(mempath, fatal=False): fp.seek(memaddr) buf = fp.read(memsize) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # search for either an SM table or DMI table @@ -5785,10 +5811,7 @@ def dmidecode(mempath, fatal=False): break i += 16 if base == 0 and length == 0 and num == 0: - if(fatal): - doError('Neither SMBIOS nor DMI were found') - else: - return out + return dmidecode_backup(out, fatal) # read in the SM or DMI table try: @@ -5796,11 +5819,7 @@ def dmidecode(mempath, fatal=False): fp.seek(base) buf = fp.read(length) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # scan the table for the values we want @@ -6272,7 +6291,10 @@ def find_in_html(html, start, end, firstonly=True): return out def data_from_html(file, outpath, issues, fulldetail=False): - html = open(file, 'r').read() + try: + html = open(file, 'r').read() + except: + html = ascii(open(file, 'rb').read()) sysvals.htmlfile = os.path.relpath(file, outpath) # extract general info suspend = find_in_html(html, 'Kernel Suspend', 'ms') @@ -6290,7 +6312,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): tstr = dt.strftime('%Y/%m/%d %H:%M:%S') error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>') if error: - m = re.match('[a-z0-9]* failed in (?P<p>\S*).*', error) + m = re.match(r'[a-z0-9]* failed in (?P<p>\S*).*', error) if m: result = 'fail in %s' % m.group('p') else: @@ -6307,8 +6329,9 @@ def data_from_html(file, outpath, issues, fulldetail=False): d.end = 999999999 d.dmesgtext = log.split('\n') tp = d.extractErrorInfo() - for msg in tp.msglist: - sysvals.errorSummary(issues, msg) + if len(issues) < 100: + for msg in tp.msglist: + sysvals.errorSummary(issues, msg) if stmp[2] == 'freeze': extra = d.turbostatInfo() elist = dict() @@ -6325,6 +6348,11 @@ def data_from_html(file, outpath, issues, fulldetail=False): line = find_in_html(log, '# netfix ', '\n') if line: extra['netfix'] = line + line = find_in_html(log, '# command ', '\n') + if line: + m = re.match(r'.* -m (?P<m>\S*).*', line) + if m: + extra['fullmode'] = m.group('m') low = find_in_html(html, 'freeze time: <b>', ' ms</b>') for lowstr in ['waking', '+']: if not low: @@ -6334,7 +6362,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): if lowstr == '+': issue = 'S2LOOPx%d' % len(low.split('+')) else: - m = re.match('.*waking *(?P<n>[0-9]*) *times.*', low) + m = re.match(r'.*waking *(?P<n>[0-9]*) *times.*', low) issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN' match = [i for i in issues if i['match'] == issue] if len(match) > 0: @@ -6352,10 +6380,10 @@ def data_from_html(file, outpath, issues, fulldetail=False): # extract device info devices = dict() for line in html.split('\n'): - m = re.match(' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line) + m = re.match(r' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line) if not m or 'thread kth' in line or 'thread sec' in line: continue - m = re.match('(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title')) + m = re.match(r'(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title')) if not m: continue name, time, phase = m.group('n'), m.group('t'), m.group('p') @@ -6416,9 +6444,9 @@ def genHtml(subdir, force=False): for filename in filenames: file = os.path.join(dirname, filename) if sysvals.usable(file): - if(re.match('.*_dmesg.txt', filename)): + if(re.match(r'.*_dmesg.txt', filename)): sysvals.dmesgfile = file - elif(re.match('.*_ftrace.txt', filename)): + elif(re.match(r'.*_ftrace.txt', filename)): sysvals.ftracefile = file sysvals.setOutputFile() if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \ @@ -6441,7 +6469,7 @@ def runSummary(subdir, local=True, genhtml=False): desc = {'host':[],'mode':[],'kernel':[]} for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: - if(not re.match('.*.html', filename)): + if(not re.match(r'.*.html', filename)): continue data = data_from_html(os.path.join(dirname, filename), outpath, issues) if(not data): diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5899c27c2e2e..5127be34869e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.19"; +static const char *version_str = "v1.20"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 05efffbca3b7..e05561d00458 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -283,6 +283,8 @@ int isst_set_trl(struct isst_id *id, unsigned long long trl) return 0; } +#define MSR_TRL_FREQ_MULTIPLIER 100 + int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl) { unsigned long long msr_trl; @@ -310,6 +312,10 @@ int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl) for (i = 0; i < 8; ++i) { unsigned long long _trl = trl[i]; + /* MSR is always in 100 MHz unit */ + if (isst_get_disp_freq_multiplier() == 1) + _trl /= MSR_TRL_FREQ_MULTIPLIER; + msr_trl |= (_trl << (i * 8)); } } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2d6dce2c8f77..3946d5254a1f 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -14,6 +14,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 @@ -44,10 +45,14 @@ snapshot: turbostat @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h + @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile @echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile - @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile @rm -f $(SNAPSHOT).tar.gz tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8d37acd39201..067717bce1d4 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -28,10 +28,13 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. .nf - location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP | \fBperf/<device>/<event>\fP} msrDDD is a decimal offset, eg. msr16 msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute + <device> is a perf device from /sys/bus/event_source/devices/<device> eg. cstate_core + <event> is a perf event for given device from /sys/bus/event_source/devices/<device>/events/<event> eg. c1-residency + perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} sample and print the counter for every cpu, core, or package. @@ -52,6 +55,39 @@ name as necessary to disambiguate it from others is necessary. Note that option as the column header. .fi .PP +\fB--add pmt,[attr_name=attr_value, ...]\fP add column with a PMT (Intel Platform Monitoring Technology) counter in a similar way to --add option above, but require PMT metadata to be supplied to correctly read and display the counter. The metadata can be found in the Intel PMT XML files, hosted at https://github.com/intel/Intel-PMT. For a complete example see "ADD PMT COUNTER EXAMPLE". +.nf + name="name_string" + For column header. + + type={\fBraw\fP} + 'raw' shows the counter contents in hex. + default: raw + + format={\fBraw\fP | \fBdelta\fP} + 'raw' shows the counter contents in hex. + 'delta' shows the difference in values during the measurement interval. + default: raw + + domain={\fBcpu%u\fP | \fBcore%u\fP | \fBpackage%u\fP} + 'cpu' per cpu/thread counter. + 'core' per core counter. + 'package' per package counter. + '%u' denotes id of the domain that the counter is associated with. For example core4 would mean that the counter is associated with core number 4. + + offset=\fB%u\fP + '%u' offset within the PMT MMIO region. + + lsb=\fB%u\fP + '%u' least significant bit within the 64 bit value read from 'offset'. Together with 'msb', used to form a read mask. + + msb=\fB%u\fP + '%u' most significant bit within the 64 bit value read from 'offset'. Together with 'lsb', used to form a read mask. + + guid=\fB%x\fP + '%x' hex identifier of the PMT MMIO region. +.fi +.PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. @@ -67,10 +103,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--quiet\fP Do not decode and print the system configuration header information. .PP -+\fB--no-msr\fP Disable all the uses of the MSR driver. -+.PP -+\fB--no-perf\fP Disable all the uses of the perf API. -+.PP +\fB--no-msr\fP Disable all the uses of the MSR driver. +.PP +\fB--no-perf\fP Disable all the uses of the perf API. +.PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--num_iterations num\fP number of the measurement iterations. @@ -320,7 +356,7 @@ available on all processors. Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), labeling it with the column header, "PRF_CTRL", and display it only once, -afte the conclusion of a 0.1 second sleep. +after the conclusion of a 0.1 second sleep. .nf sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 0.101604 sec @@ -333,6 +369,56 @@ CPU PRF_CTRL .fi +.SH ADD PERF COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing time spent in C1 core cstate, +labeling it with the column header, "pCPU%c1", and display it only once, +after the conclusion of 0.1 second sleep. +We also show CPU%c1 built-in counter that should show similar values. +.nf +sudo ./turbostat --quiet --cpu 0-3 --show CPU,CPU%c1 --add perf/cstate_core/c1-residency,cpu,delta,percent,pCPU%c1 sleep .1 +0.102448 sec +CPU pCPU%c1 CPU%c1 +- 34.89 34.89 +0 45.99 45.99 +1 45.94 45.94 +2 23.83 23.83 +3 23.84 23.84 + +.fi + +.SH ADD PMT COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number 0. +We add two counters, showing crystal clock count and the DC6 residency. +All the parameters passed are based on the metadata found in the PMT XML files. + +For the crystal clock count, we +label it with the column header, "XTAL", +we set the type to 'raw', to read the number of clock ticks in hex, +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +For the DC6 residency counter, we +label it with the column header, "Die%c6", +we set the type to 'txtal_time', to obtain the percent residency value +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +.nf +sudo ./turbostat --quiet --cpu 0 --show CPU --add pmt,name=XTAL,type=raw,format=delta,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102 --add pmt,name=Die%c6,type=txtal_time,format=delta,domain=package0,offset=120,lsb=0,msb=63,guid=0x1a067102 +0.104352 sec +CPU XTAL Die%c6 +- 0x0000006d4d957ca7 0.00 +0 0x0000006d4d957ca7 0.00 +0.102448 sec +.fi + .SH INPUT For interval-mode, turbostat will immediately end the current interval diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8cdf41906e98..089220aaa5c9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9,7 +9,32 @@ #define _GNU_SOURCE #include MSRHEADER + +// copied from arch/x86/include/asm/cpu_device_id.h +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +// end copied section + +#define X86_VENDOR_INTEL 0 + #include INTEL_FAMILY_HEADER +#include BUILD_BUG_HEADER #include <stdarg.h> #include <stdio.h> #include <err.h> @@ -19,6 +44,7 @@ #include <sys/stat.h> #include <sys/select.h> #include <sys/resource.h> +#include <sys/mman.h> #include <fcntl.h> #include <signal.h> #include <sys/time.h> @@ -38,7 +64,6 @@ #include <stdbool.h> #include <assert.h> #include <linux/kernel.h> -#include <linux/build_bug.h> #define UNUSED(x) (void)(x) @@ -55,15 +80,39 @@ */ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define PERF_NAME_BYTES 128 #define MAX_NOFILE 0x8000 +#define COUNTER_KIND_PERF_PREFIX "perf/" +#define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) +#define PERF_DEV_NAME_BYTES 32 +#define PERF_EVT_NAME_BYTES 32 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; -enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; -enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; -enum cstate_source { CSTATE_SOURCE_NONE, CSTATE_SOURCE_PERF, CSTATE_SOURCE_MSR }; +enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; + +struct perf_counter_info { + struct perf_counter_info *next; + + /* How to open the counter / What counter it is. */ + char device[PERF_DEV_NAME_BYTES]; + char event[PERF_EVT_NAME_BYTES]; + + /* How to show/format the counter. */ + char name[PERF_NAME_BYTES]; + unsigned int width; + enum counter_scope scope; + enum counter_type type; + enum counter_format format; + double scale; + + /* For reading the counter. */ + int *fd_perf_per_domain; + size_t num_domains; +}; struct sysfs_path { char path[PATH_BYTES]; @@ -144,6 +193,7 @@ struct msr_counter bic[] = { { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -205,11 +255,12 @@ struct msr_counter bic[] = { #define BIC_SAM_mc6 (1ULL << 55) #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) +#define BIC_Diec6 (1ULL << 58) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) +#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -252,7 +303,6 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; -struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -267,6 +317,7 @@ unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int has_aperf; +unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; @@ -307,7 +358,6 @@ unsigned int first_counter_read = 1; int ignore_stdin; bool no_msr; bool no_perf; -enum amperf_source amperf_source; enum gfx_sysfs_idx { GFX_rc6, @@ -367,7 +417,7 @@ struct platform_features { }; struct platform_data { - unsigned int model; + unsigned int vfm; const struct platform_features *features; }; @@ -910,75 +960,75 @@ static const struct platform_features amd_features_with_rapl = { }; static const struct platform_data turbostat_pdata[] = { - { INTEL_FAM6_NEHALEM, &nhm_features }, - { INTEL_FAM6_NEHALEM_G, &nhm_features }, - { INTEL_FAM6_NEHALEM_EP, &nhm_features }, - { INTEL_FAM6_NEHALEM_EX, &nhx_features }, - { INTEL_FAM6_WESTMERE, &nhm_features }, - { INTEL_FAM6_WESTMERE_EP, &nhm_features }, - { INTEL_FAM6_WESTMERE_EX, &nhx_features }, - { INTEL_FAM6_SANDYBRIDGE, &snb_features }, - { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, - { INTEL_FAM6_IVYBRIDGE, &ivb_features }, - { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, - { INTEL_FAM6_HASWELL, &hsw_features }, - { INTEL_FAM6_HASWELL_X, &hsx_features }, - { INTEL_FAM6_HASWELL_L, &hswl_features }, - { INTEL_FAM6_HASWELL_G, &hswg_features }, - { INTEL_FAM6_BROADWELL, &bdw_features }, - { INTEL_FAM6_BROADWELL_G, &bdwg_features }, - { INTEL_FAM6_BROADWELL_X, &bdx_features }, - { INTEL_FAM6_BROADWELL_D, &bdx_features }, - { INTEL_FAM6_SKYLAKE_L, &skl_features }, - { INTEL_FAM6_SKYLAKE, &skl_features }, - { INTEL_FAM6_SKYLAKE_X, &skx_features }, - { INTEL_FAM6_KABYLAKE_L, &skl_features }, - { INTEL_FAM6_KABYLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE_L, &skl_features }, - { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_X, &icx_features }, - { INTEL_FAM6_ICELAKE_D, &icx_features }, - { INTEL_FAM6_ICELAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, - { INTEL_FAM6_ROCKETLAKE, &cnl_features }, - { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, - { INTEL_FAM6_TIGERLAKE, &cnl_features }, - { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, - { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, - { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, - { INTEL_FAM6_LAKEFIELD, &cnl_features }, - { INTEL_FAM6_ALDERLAKE, &adl_features }, - { INTEL_FAM6_ALDERLAKE_L, &adl_features }, - { INTEL_FAM6_RAPTORLAKE, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, - { INTEL_FAM6_METEORLAKE, &cnl_features }, - { INTEL_FAM6_METEORLAKE_L, &cnl_features }, - { INTEL_FAM6_ARROWLAKE_H, &arl_features }, - { INTEL_FAM6_ARROWLAKE_U, &arl_features }, - { INTEL_FAM6_ARROWLAKE, &arl_features }, - { INTEL_FAM6_LUNARLAKE_M, &arl_features }, - { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, - { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, - { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, - { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, - { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, - { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, - { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, - { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, - { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, - { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, - { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, - { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, - { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, - { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + { INTEL_NEHALEM, &nhm_features }, + { INTEL_NEHALEM_G, &nhm_features }, + { INTEL_NEHALEM_EP, &nhm_features }, + { INTEL_NEHALEM_EX, &nhx_features }, + { INTEL_WESTMERE, &nhm_features }, + { INTEL_WESTMERE_EP, &nhm_features }, + { INTEL_WESTMERE_EX, &nhx_features }, + { INTEL_SANDYBRIDGE, &snb_features }, + { INTEL_SANDYBRIDGE_X, &snx_features }, + { INTEL_IVYBRIDGE, &ivb_features }, + { INTEL_IVYBRIDGE_X, &ivx_features }, + { INTEL_HASWELL, &hsw_features }, + { INTEL_HASWELL_X, &hsx_features }, + { INTEL_HASWELL_L, &hswl_features }, + { INTEL_HASWELL_G, &hswg_features }, + { INTEL_BROADWELL, &bdw_features }, + { INTEL_BROADWELL_G, &bdwg_features }, + { INTEL_BROADWELL_X, &bdx_features }, + { INTEL_BROADWELL_D, &bdx_features }, + { INTEL_SKYLAKE_L, &skl_features }, + { INTEL_SKYLAKE, &skl_features }, + { INTEL_SKYLAKE_X, &skx_features }, + { INTEL_KABYLAKE_L, &skl_features }, + { INTEL_KABYLAKE, &skl_features }, + { INTEL_COMETLAKE, &skl_features }, + { INTEL_COMETLAKE_L, &skl_features }, + { INTEL_CANNONLAKE_L, &cnl_features }, + { INTEL_ICELAKE_X, &icx_features }, + { INTEL_ICELAKE_D, &icx_features }, + { INTEL_ICELAKE_L, &cnl_features }, + { INTEL_ICELAKE_NNPI, &cnl_features }, + { INTEL_ROCKETLAKE, &cnl_features }, + { INTEL_TIGERLAKE_L, &cnl_features }, + { INTEL_TIGERLAKE, &cnl_features }, + { INTEL_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_EMERALDRAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_LAKEFIELD, &cnl_features }, + { INTEL_ALDERLAKE, &adl_features }, + { INTEL_ALDERLAKE_L, &adl_features }, + { INTEL_RAPTORLAKE, &adl_features }, + { INTEL_RAPTORLAKE_P, &adl_features }, + { INTEL_RAPTORLAKE_S, &adl_features }, + { INTEL_METEORLAKE, &cnl_features }, + { INTEL_METEORLAKE_L, &cnl_features }, + { INTEL_ARROWLAKE_H, &arl_features }, + { INTEL_ARROWLAKE_U, &arl_features }, + { INTEL_ARROWLAKE, &arl_features }, + { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_ATOM_SILVERMONT, &slv_features }, + { INTEL_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_ATOM_AIRMONT, &amt_features }, + { INTEL_ATOM_GOLDMONT, &gmt_features }, + { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_ATOM_TREMONT, &tmt_features }, + { INTEL_ATOM_TREMONT_L, &tmt_features }, + { INTEL_ATOM_GRACEMONT, &adl_features }, + { INTEL_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_XEON_PHI_KNL, &knl_features }, + { INTEL_XEON_PHI_KNM, &knl_features }, /* * Missing support for - * INTEL_FAM6_ICELAKE - * INTEL_FAM6_ATOM_SILVERMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_NP + * INTEL_ICELAKE + * INTEL_ATOM_SILVERMONT_MID + * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, }; @@ -1003,11 +1053,11 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } - if (!genuine_intel || family != 6) + if (!genuine_intel) return; for (i = 0; turbostat_pdata[i].features; i++) { - if (turbostat_pdata[i].model == model) { + if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { platform = turbostat_pdata[i].features; return; } @@ -1034,8 +1084,13 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 +#define PMT_MAX_ADDED_THREAD_COUNTERS 24 +#define PMT_MAX_ADDED_CORE_COUNTERS 8 +#define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 #define BITMASK_SIZE 32 +#define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) + /* Indexes used to map data read from perf and MSRs into global variables */ enum rapl_rci_index { RAPL_RCI_INDEX_ENERGY_PKG = 0, @@ -1056,19 +1111,13 @@ enum rapl_unit { struct rapl_counter_info_t { unsigned long long data[NUM_RAPL_COUNTERS]; - enum rapl_source source[NUM_RAPL_COUNTERS]; + enum counter_source source[NUM_RAPL_COUNTERS]; unsigned long long flags[NUM_RAPL_COUNTERS]; double scale[NUM_RAPL_COUNTERS]; enum rapl_unit unit[NUM_RAPL_COUNTERS]; - - union { - /* Active when source == RAPL_SOURCE_MSR */ - struct { - unsigned long long msr[NUM_RAPL_COUNTERS]; - unsigned long long msr_mask[NUM_RAPL_COUNTERS]; - int msr_shift[NUM_RAPL_COUNTERS]; - }; - }; + unsigned long long msr[NUM_RAPL_COUNTERS]; + unsigned long long msr_mask[NUM_RAPL_COUNTERS]; + int msr_shift[NUM_RAPL_COUNTERS]; int fd_perf; }; @@ -1224,7 +1273,7 @@ enum ccstate_rci_index { struct cstate_counter_info_t { unsigned long long data[NUM_CSTATE_COUNTERS]; - enum cstate_source source[NUM_CSTATE_COUNTERS]; + enum counter_source source[NUM_CSTATE_COUNTERS]; unsigned long long msr[NUM_CSTATE_COUNTERS]; int fd_perf_core; int fd_perf_pkg; @@ -1361,6 +1410,167 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { }, }; +/* Indexes used to map data read from perf and MSRs into global variables */ +enum msr_rci_index { + MSR_RCI_INDEX_APERF = 0, + MSR_RCI_INDEX_MPERF = 1, + MSR_RCI_INDEX_SMI = 2, + NUM_MSR_COUNTERS, +}; + +struct msr_counter_info_t { + unsigned long long data[NUM_MSR_COUNTERS]; + enum counter_source source[NUM_MSR_COUNTERS]; + unsigned long long msr[NUM_MSR_COUNTERS]; + unsigned long long msr_mask[NUM_MSR_COUNTERS]; + int fd_perf; +}; + +struct msr_counter_info_t *msr_counter_info; +unsigned int msr_counter_info_size; + +struct msr_counter_arch_info { + const char *perf_subsys; + const char *perf_name; + unsigned long long msr; + unsigned long long msr_mask; + unsigned int rci_index; /* Maps data from perf counters to global variables */ + bool needed; + bool present; +}; + +enum msr_arch_info_index { + MSR_ARCH_INFO_APERF_INDEX = 0, + MSR_ARCH_INFO_MPERF_INDEX = 1, + MSR_ARCH_INFO_SMI_INDEX = 2, +}; + +static struct msr_counter_arch_info msr_counter_arch_infos[] = { + [MSR_ARCH_INFO_APERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "aperf", + .msr = MSR_IA32_APERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_APERF, + }, + + [MSR_ARCH_INFO_MPERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "mperf", + .msr = MSR_IA32_MPERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_MPERF, + }, + + [MSR_ARCH_INFO_SMI_INDEX] = { + .perf_subsys = "msr", + .perf_name = "smi", + .msr = MSR_SMI_COUNT, + .msr_mask = 0xFFFFFFFF, + .rci_index = MSR_RCI_INDEX_SMI, + }, +}; + +/* Can be redefined when compiling, useful for testing. */ +#ifndef SYSFS_TELEM_PATH +#define SYSFS_TELEM_PATH "/sys/class/intel_pmt" +#endif + +#define PMT_COUNTER_MTL_DC6_OFFSET 120 +#define PMT_COUNTER_MTL_DC6_LSB 0 +#define PMT_COUNTER_MTL_DC6_MSB 63 +#define PMT_MTL_DC6_GUID 0x1a067102 + +#define PMT_COUNTER_NAME_SIZE_BYTES 16 +#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 + +struct pmt_mmio { + struct pmt_mmio *next; + + unsigned int guid; + unsigned int size; + + /* Base pointer to the mmaped memory. */ + void *mmio_base; + + /* + * Offset to be applied to the mmio_base + * to get the beginning of the PMT counters for given GUID. + */ + unsigned long pmt_offset; +} *pmt_mmios; + +enum pmt_datatype { + PMT_TYPE_RAW, + PMT_TYPE_XTAL_TIME, +}; + +struct pmt_domain_info { + /* + * Pointer to the MMIO obtained by applying a counter offset + * to the mmio_base of the mmaped region for the given GUID. + * + * This is where to read the raw value of the counter from. + */ + unsigned long *pcounter; +}; + +struct pmt_counter { + struct pmt_counter *next; + + /* PMT metadata */ + char name[PMT_COUNTER_NAME_SIZE_BYTES]; + enum pmt_datatype type; + enum counter_scope scope; + unsigned int lsb; + unsigned int msb; + + /* BIC-like metadata */ + enum counter_format format; + + unsigned int num_domains; + struct pmt_domain_info *domains; +}; + +unsigned int pmt_counter_get_width(const struct pmt_counter *p) +{ + return (p->msb - p->lsb) + 1; +} + +void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) +{ + struct pmt_domain_info *new_mem; + + new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); + if (!new_mem) { + fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); + exit(1); + } + + /* Zero initialize just allocated memory. */ + const size_t num_new_domains = new_size - pcounter->num_domains; + + memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); + + pcounter->num_domains = new_size; + pcounter->domains = new_mem; +} + +void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) +{ + /* + * Allocate more memory ahead of time. + * + * Always allocate space for at least 8 elements + * and double the size when growing. + */ + if (new_size < 8) + new_size = 8; + new_size = MAX(new_size, pcounter->num_domains * 2); + + pmt_counter_resize_(pcounter, new_size); +} + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1378,6 +1588,8 @@ struct thread_data { unsigned int flags; bool is_atom; unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -1391,6 +1603,8 @@ struct core_data { unsigned int core_id; unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -1423,7 +1637,10 @@ struct pkg_data { struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned int uncore_mhz; + unsigned long long die_c6; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -1558,12 +1775,25 @@ int idx_valid(int idx) } struct sys_counters { + /* MSR added counters */ unsigned int added_thread_counters; unsigned int added_core_counters; unsigned int added_package_counters; struct msr_counter *tp; struct msr_counter *cp; struct msr_counter *pp; + + /* perf added counters */ + unsigned int added_thread_perf_counters; + unsigned int added_core_perf_counters; + unsigned int added_package_perf_counters; + struct perf_counter_info *perf_tp; + struct perf_counter_info *perf_cp; + struct perf_counter_info *perf_pp; + + struct pmt_counter *pmt_tp; + struct pmt_counter *pmt_cp; + struct pmt_counter *pmt_pp; } sys; static size_t free_msr_counters_(struct msr_counter **pp) @@ -1747,7 +1977,7 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_SMI | BIC_Mod_c6 | BIC_CoreTmp | + const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; bic_enabled &= ~bic_msrs; @@ -1823,6 +2053,23 @@ int probe_msr(int cpu, off_t offset) return 0; } +/* Convert CPU ID to domain ID for given added perf counter. */ +unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) +{ + switch (pc->scope) { + case SCOPE_CPU: + return cpu; + + case SCOPE_CORE: + return cpus[cpu].physical_core_id; + + case SCOPE_PACKAGE: + return cpus[cpu].physical_package_id; + } + + __builtin_unreachable(); +} + #define MAX_DEFERRED 16 char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; @@ -1846,9 +2093,12 @@ void help(void) "to print statistics, until interrupted.\n" " -a, --add add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" + " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " debug messages are printed to stderr\n" " -D, --Dump displays the raw counter values\n" " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" @@ -1955,6 +2205,8 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) void print_header(char *delim) { struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; int printed = 0; if (DO_BIC(BIC_USEC)) @@ -2012,6 +2264,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_tp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_tp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c3)) @@ -2052,6 +2338,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_cp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_cp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2096,6 +2416,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_Diec6)) + outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_LPI)) outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); if (DO_BIC(BIC_SYS_LPI)) @@ -2147,6 +2469,40 @@ void print_header(char *delim) } } + for (pp = sys.perf_pp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + + ppmt = sys.pmt_pp; + while (ppmt) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; + } + + ppmt = ppmt->next; + } + outp += sprintf(outp, "\n"); } @@ -2267,6 +2623,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *fmt8; int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; char *delim = "\t"; int printed = 0; @@ -2404,6 +2762,51 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + /* Added perf counters */ + for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + if (pp->type == COUNTER_USEC) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->perf_counter[i] / interval_float / 10000); + else + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + /* C1 */ if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); @@ -2447,6 +2850,44 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); + } + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) @@ -2526,6 +2967,10 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + if (DO_BIC(BIC_Diec6)) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) outp += @@ -2601,6 +3046,47 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); + } else if (pp->type == COUNTER_K2M) { + outp += + sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + } + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); + + break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; + } + } + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -2654,6 +3140,8 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -2674,6 +3162,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->die_c6 = new->die_c6 - old->die_c6; old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; @@ -2714,6 +3203,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->counter[i] = new->counter[i] - old->counter[i]; } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else if (pp->format == FORMAT_AVERAGE) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2721,6 +3226,8 @@ void delta_core(struct core_data *new, struct core_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; @@ -2737,6 +3244,20 @@ void delta_core(struct core_data *new, struct core_data *old) else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } } int soft_c1_residency_display(int bic) @@ -2754,6 +3275,8 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* we run cpuid just the 1st time, copy the results */ if (DO_BIC(BIC_APIC)) @@ -2832,6 +3355,21 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2908,6 +3446,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->die_c6 = 0; p->cpu_lpi = 0; p->sys_lpi = 0; @@ -2934,6 +3473,14 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) p->counter[i] = 0; + + memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); + memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); + memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); + + memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); + memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); + memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); } void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) @@ -2954,6 +3501,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* copy un-changing apic_id's */ if (DO_BIC(BIC_APIC)) @@ -2984,6 +3533,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.counter[i] += t->counter[i]; } + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.threads.perf_counter[i] += t->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] += t->pmt_counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3004,6 +3563,16 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.counter[i] += c->counter[i]; } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.cores.perf_counter[i] += c->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] += c->pmt_counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!is_cpu_first_core_in_package(t, c, p)) return 0; @@ -3027,6 +3596,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.die_c6 += p->die_c6; average.packages.cpu_lpi = p->cpu_lpi; average.packages.sys_lpi = p->sys_lpi; @@ -3055,6 +3625,18 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) else average.packages.counter[i] += p->counter[i]; } + + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.perf_counter[i] = p->perf_counter[i]; + else + average.packages.perf_counter[i] += p->perf_counter[i]; + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] += p->pmt_counter[i]; + } + return 0; } @@ -3066,6 +3648,8 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data { int i; struct msr_counter *mp; + struct perf_counter_info *pp; + struct pmt_counter *ppmt; clear_counters(&average.threads, &average.cores, &average.packages); @@ -3108,6 +3692,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data average.packages.pc8 /= topo.allowed_packages; average.packages.pc9 /= topo.allowed_packages; average.packages.pc10 /= topo.allowed_packages; + average.packages.die_c6 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -3137,6 +3722,45 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data } average.packages.counter[i] /= topo.allowed_packages; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.threads.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } + average.threads.perf_counter[i] /= topo.allowed_cpus; + } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.cores.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.cores.perf_counter[i] /= topo.allowed_cores; + } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.packages.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.packages.perf_counter[i] /= topo.allowed_packages; + } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] /= topo.allowed_cpus; + } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] /= topo.allowed_cores; + } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] /= topo.allowed_packages; + } } static unsigned long long rdtsc(void) @@ -3382,30 +4006,6 @@ static unsigned int read_perf_counter_info_n(const char *const path, const char return v; } -static unsigned int read_msr_type(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/type"; - const char *const format = "%u"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_aperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_mperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - static unsigned int read_perf_type(const char *subsys) { const char *const path_format = "/sys/bus/event_source/devices/%s/type"; @@ -3417,15 +4017,55 @@ static unsigned int read_perf_type(const char *subsys) return read_perf_counter_info_n(path, format); } -static unsigned int read_rapl_config(const char *subsys, const char *event_name) +static unsigned int read_perf_config(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; - const char *const format = "event=%x"; + FILE *fconfig = NULL; char path[128]; + char config_str[64]; + unsigned int config; + unsigned int umask; + bool has_config = false; + bool has_umask = false; + unsigned int ret = -1; snprintf(path, sizeof(path), path_format, subsys, event_name); - return read_perf_counter_info_n(path, format); + fconfig = fopen(path, "r"); + if (!fconfig) + return -1; + + if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) + goto cleanup_and_exit; + + for (char *pconfig_str = &config_str[0]; pconfig_str;) { + if (sscanf(pconfig_str, "event=%x", &config) == 1) { + has_config = true; + goto next; + } + + if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { + has_umask = true; + goto next; + } + +next: + pconfig_str = strchr(pconfig_str, ','); + if (pconfig_str) { + *pconfig_str = '\0'; + ++pconfig_str; + } + } + + if (!has_umask) + umask = 0; + + if (has_config) + ret = (umask << 8) | config; + +cleanup_and_exit: + fclose(fconfig); + return ret; } static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) @@ -3444,7 +4084,7 @@ static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_na return RAPL_UNIT_INVALID; } -static double read_perf_rapl_scale(const char *subsys, const char *event_name) +static double read_perf_scale(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; const char *const format = "%lf"; @@ -3459,130 +4099,12 @@ static double read_perf_rapl_scale(const char *subsys, const char *event_name) return scale; } -static struct amperf_group_fd open_amperf_fd(int cpu) -{ - const unsigned int msr_type = read_msr_type(); - const unsigned int aperf_config = read_aperf_config(); - const unsigned int mperf_config = read_mperf_config(); - struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; - - fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); - fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); - - return fds; -} - -static int get_amperf_fd(int cpu) -{ - assert(fd_amperf_percpu); - - if (fd_amperf_percpu[cpu].aperf) - return fd_amperf_percpu[cpu].aperf; - - fd_amperf_percpu[cpu] = open_amperf_fd(cpu); - - return fd_amperf_percpu[cpu].aperf; -} - -/* Read APERF, MPERF and TSC using the perf API. */ -static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) -{ - union { - struct { - unsigned long nr_entries; - unsigned long aperf; - unsigned long mperf; - }; - - unsigned long as_array[3]; - } cnt; - - const int fd_amperf = get_amperf_fd(cpu); - - /* - * Read the TSC with rdtsc, because we want the absolute value and not - * the offset from the start of the counter. - */ - t->tsc = rdtsc(); - - const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); - - if (n != sizeof(cnt.as_array)) - return -2; - - t->aperf = cnt.aperf * aperf_mperf_multiplier; - t->mperf = cnt.mperf * aperf_mperf_multiplier; - - return 0; -} - -/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ -static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) -{ - unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; - int aperf_mperf_retry_count = 0; - - /* - * The TSC, APERF and MPERF must be read together for - * APERF/MPERF and MPERF/TSC to give accurate results. - * - * Unfortunately, APERF and MPERF are read by - * individual system call, so delays may occur - * between them. If the time to read them - * varies by a large amount, we re-read them. - */ - - /* - * This initial dummy APERF read has been seen to - * reduce jitter in the subsequent reads. - */ - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - -retry: - t->tsc = rdtsc(); /* re-read close to APERF */ - - tsc_before = t->tsc; - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - tsc_between = rdtsc(); - - if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) - return -4; - - tsc_after = rdtsc(); - - aperf_time = tsc_between - tsc_before; - mperf_time = tsc_after - tsc_between; - - /* - * If the system call latency to read APERF and MPERF - * differ by more than 2x, then try again. - */ - if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { - aperf_mperf_retry_count++; - if (aperf_mperf_retry_count < 5) - goto retry; - else - warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); - } - aperf_mperf_retry_count = 0; - - t->aperf = t->aperf * aperf_mperf_multiplier; - t->mperf = t->mperf * aperf_mperf_multiplier; - - return 0; -} - size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) { size_t ret = 0; for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) - if (rci->source[i] == RAPL_SOURCE_PERF) + if (rci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3593,7 +4115,7 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t size_t ret = 0; for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) - if (cci->source[i] == CSTATE_SOURCE_PERF) + if (cci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3611,7 +4133,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); assert(rapl_counter_info_perdomain); @@ -3634,14 +4156,14 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { - case RAPL_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case RAPL_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(rci->fd_perf != -1); - if (debug) + if (debug >= 2) fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); @@ -3650,8 +4172,8 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct ++pi; break; - case RAPL_SOURCE_MSR: - if (debug) + case COUNTER_SOURCE_MSR: + if (debug >= 2) fprintf(stderr, "Reading rapl counter via msr at %u\n", i); assert(!no_msr); @@ -3709,15 +4231,15 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat struct cstate_counter_info_t *cci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d\n", __func__, cpu); assert(ccstate_counter_info); assert(cpu <= ccstate_counter_info_size); - memset(perf_data, 0, sizeof(perf_data)); - memset(perf_data_core, 0, sizeof(perf_data_core)); - memset(perf_data_pkg, 0, sizeof(perf_data_pkg)); + ZERO_ARRAY(perf_data); + ZERO_ARRAY(perf_data_core); + ZERO_ARRAY(perf_data_pkg); cci = &ccstate_counter_info[cpu]; @@ -3772,30 +4294,28 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { switch (cci->source[i]) { - case CSTATE_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case CSTATE_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); - } cci->data[i] = perf_data[pi]; ++pi; break; - case CSTATE_SOURCE_MSR: + case COUNTER_SOURCE_MSR: assert(!no_msr); if (get_msr(cpu, cci->msr[i], &cci->data[i])) return -13 - i; - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); - } break; } @@ -3809,7 +4329,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat * when invoked for the thread sibling. */ #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ - if (cci->source[index] != CSTATE_SOURCE_NONE) \ + if (cci->source[index] != COUNTER_SOURCE_NONE) \ out_counter = cci->data[index]; \ } while (0) @@ -3833,6 +4353,135 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat return 0; } +size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) +{ + size_t ret = 0; + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) + if (mci->source[i] == COUNTER_SOURCE_PERF) + ++ret; + + return ret; +} + +int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) +{ + unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; + + struct msr_counter_info_t *mci; + + if (debug >= 2) + fprintf(stderr, "%s: cpu%d\n", __func__, cpu); + + assert(msr_counter_info); + assert(cpu <= msr_counter_info_size); + + mci = &msr_counter_info[cpu]; + + ZERO_ARRAY(perf_data); + ZERO_ARRAY(mci->data); + + if (mci->fd_perf != -1) { + const size_t num_perf_counters = msr_counter_info_count_perf(mci); + const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); + const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); + + if (actual_read_size != expected_read_size) + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, + actual_read_size); + } + + for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { + switch (mci->source[i]) { + case COUNTER_SOURCE_NONE: + break; + + case COUNTER_SOURCE_PERF: + assert(pi < ARRAY_SIZE(perf_data)); + assert(mci->fd_perf != -1); + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); + + mci->data[i] = perf_data[pi]; + + ++pi; + break; + + case COUNTER_SOURCE_MSR: + assert(!no_msr); + + if (get_msr(cpu, mci->msr[i], &mci->data[i])) + return -2 - i; + + mci->data[i] &= mci->msr_mask[i]; + + if (debug >= 2) + fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); + + break; + } + } + + BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); + t->aperf = mci->data[MSR_RCI_INDEX_APERF]; + t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; + t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; + + return 0; +} + +int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) +{ + unsigned int domain; + unsigned long long value; + int fd_counter; + + for (size_t i = 0; pp; ++i, pp = pp->next) { + domain = cpu_to_domain(pp, cpu); + assert(domain < pp->num_domains); + + fd_counter = pp->fd_perf_per_domain[domain]; + + if (fd_counter == -1) + continue; + + if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) + return 1; + + assert(i < out_size); + out[i] = value * pp->scale; + } + + return 0; +} + +unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) +{ + unsigned long mask; + + if (msb == 63) + mask = 0xffffffffffffffff; + else + mask = ((1 << (msb + 1)) - 1); + + mask -= (1 << lsb) - 1; + + return mask; +} + +unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) +{ + assert(domain_id < ppmt->num_domains); + + const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; + const unsigned long value = pmmio ? *pmmio : 0; + const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); + const unsigned long value_shift = ppmt->lsb; + + return (value & value_mask) >> value_shift; +} + /* * get_counters(...) * migrate to cpu @@ -3843,6 +4492,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; struct msr_counter *mp; + struct pmt_counter *pp; int i; int status; @@ -3858,24 +4508,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) - || soft_c1_residency_display(BIC_Avg_MHz)) { - int status = -1; - - assert(!no_perf || !no_msr); - - switch (amperf_source) { - case AMPERF_SOURCE_PERF: - status = read_aperf_mperf_tsc_perf(t, cpu); - break; - case AMPERF_SOURCE_MSR: - status = read_aperf_mperf_tsc_msr(t, cpu); - break; - } - - if (status != 0) - return status; - } + get_smi_aperf_mperf(cpu, t); if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) @@ -3883,11 +4516,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; - if (DO_BIC(BIC_SMI)) { - if (get_msr(cpu, MSR_SMI_COUNT, &msr)) - return -5; - t->smi_count = msr & 0xFFFFFFFF; - } get_cstate_counters(cpu, t, c, p); @@ -3896,6 +4524,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) + t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); + /* collect core counters only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) goto done; @@ -3934,6 +4568,12 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) + c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); + /* collect package counters only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) goto done; @@ -4006,6 +4646,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_mp(cpu, mp, &p->counter[i], path)) return -10; } + + if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) + return -10; + + for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) + p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); + done: gettimeofday(&t->tv_end, (struct timezone *)NULL); @@ -4469,25 +5116,6 @@ void free_fd_percpu(void) fd_percpu = NULL; } -void free_fd_amperf_percpu(void) -{ - int i; - - if (!fd_amperf_percpu) - return; - - for (i = 0; i < topo.max_cpu_num + 1; ++i) { - if (fd_amperf_percpu[i].mperf != 0) - close(fd_amperf_percpu[i].mperf); - - if (fd_amperf_percpu[i].aperf != 0) - close(fd_amperf_percpu[i].aperf); - } - - free(fd_amperf_percpu); - fd_amperf_percpu = NULL; -} - void free_fd_instr_count_percpu(void) { if (!fd_instr_count_percpu) @@ -4522,6 +5150,21 @@ void free_fd_cstate(void) ccstate_counter_info_size = 0; } +void free_fd_msr(void) +{ + if (!msr_counter_info) + return; + + for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { + if (msr_counter_info[cpu].fd_perf != -1) + close(msr_counter_info[cpu].fd_perf); + } + + free(msr_counter_info); + msr_counter_info = NULL; + msr_counter_info_size = 0; +} + void free_fd_rapl_percpu(void) { if (!rapl_counter_info_perdomain) @@ -4539,6 +5182,36 @@ void free_fd_rapl_percpu(void) rapl_counter_info_perdomain_size = 0; } +void free_fd_added_perf_counters_(struct perf_counter_info *pp) +{ + if (!pp) + return; + + if (!pp->fd_perf_per_domain) + return; + + while (pp) { + for (size_t domain = 0; domain < pp->num_domains; ++domain) { + if (pp->fd_perf_per_domain[domain] != -1) { + close(pp->fd_perf_per_domain[domain]); + pp->fd_perf_per_domain[domain] = -1; + } + } + + free(pp->fd_perf_per_domain); + pp->fd_perf_per_domain = NULL; + + pp = pp->next; + } +} + +void free_fd_added_perf_counters(void) +{ + free_fd_added_perf_counters_(sys.perf_tp); + free_fd_added_perf_counters_(sys.perf_cp); + free_fd_added_perf_counters_(sys.perf_pp); +} + void free_all_buffers(void) { int i; @@ -4581,9 +5254,10 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); - free_fd_amperf_percpu(); + free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); + free_fd_added_perf_counters(); free(irq_column_2_cpu); free(irqs_per_cpu); @@ -4918,16 +5592,22 @@ static void update_effective_set(bool startup) } void linux_perf_init(void); +void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void added_perf_counters_init(void); +void pmt_init(void); void re_initialize(void) { free_all_buffers(); setup_all_buffers(false); linux_perf_init(); + msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } @@ -5695,9 +6375,6 @@ static void probe_intel_uncore_frequency_cluster(void) if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) return; - if (quiet) - return; - for (uncore_max_id = 0;; ++uncore_max_id) { sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); @@ -5727,6 +6404,14 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/fabric_cluster_id", path_base); cluster_id = read_sysfs_int(path); + sprintf(path, "%s/current_freq_khz", path_base); + sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); + + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + + if (quiet) + continue; + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); @@ -5743,11 +6428,6 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); k = read_sysfs_int(path); fprintf(outf, " %d MHz\n", k / 1000); - - sprintf(path, "%s/current_freq_khz", path_base); - sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); } } @@ -6779,22 +7459,13 @@ static int has_instr_count_access(void) return has_access; } -bool is_aperf_access_required(void) -{ - return BIC_IS_ENABLED(BIC_Avg_MHz) - || BIC_IS_ENABLED(BIC_Busy) - || BIC_IS_ENABLED(BIC_Bzy_MHz) - || BIC_IS_ENABLED(BIC_IPC) - || BIC_IS_ENABLED(BIC_CPU_c1); -} - int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { if (no_perf) return -1; - const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); + const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); if (scale == 0.0) return -1; @@ -6805,7 +7476,7 @@ int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struc return -1; const unsigned int rapl_type = read_perf_type(cai->perf_subsys); - const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); @@ -6826,7 +7497,7 @@ int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct { int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; @@ -6846,14 +7517,6 @@ void linux_perf_init(void) if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } - - const bool aperf_required = is_aperf_access_required(); - - if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { - fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); - if (fd_amperf_percpu == NULL) - err(-1, "calloc fd_amperf_percpu"); - } } void rapl_perf_init(void) @@ -6875,7 +7538,7 @@ void rapl_perf_init(void) rci->fd_perf = -1; for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { rci->data[i] = 0; - rci->source[i] = RAPL_SOURCE_NONE; + rci->source[i] = COUNTER_SOURCE_NONE; } } @@ -6917,14 +7580,14 @@ void rapl_perf_init(void) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { - rci->source[cai->rci_index] = RAPL_SOURCE_PERF; + rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; rci->scale[cai->rci_index] = scale * cai->compat_scale; rci->unit[cai->rci_index] = unit; rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { - rci->source[cai->rci_index] = RAPL_SOURCE_MSR; + rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; rci->msr_shift[cai->rci_index] = cai->msr_shift; @@ -6934,7 +7597,7 @@ void rapl_perf_init(void) } } - if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) + if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) has_counter = 1; } @@ -6946,75 +7609,11 @@ void rapl_perf_init(void) free(domain_visited); } -static int has_amperf_access_via_msr(void) -{ - if (no_msr) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_APERF)) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_MPERF)) - return 0; - - return 1; -} - -static int has_amperf_access_via_perf(void) -{ - struct amperf_group_fd fds; - - /* - * Cache the last result, so we don't warn the user multiple times - * - * Negative means cached, no access - * Zero means not cached - * Positive means cached, has access - */ - static int has_access_cached; - - if (no_perf) - return 0; - - if (has_access_cached != 0) - return has_access_cached > 0; - - fds = open_amperf_fd(base_cpu); - has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); - - if (fds.aperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "APERF perf counter", "--no-perf"); - else - close(fds.aperf); - - if (fds.mperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "MPERF perf counter", "--no-perf"); - else - close(fds.mperf); - - if (has_access_cached == 0) - has_access_cached = -1; - - return has_access_cached > 0; -} - -/* Check if we can access APERF and MPERF */ +/* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - if (!is_aperf_access_required()) - return 0; - - if (!no_msr && has_amperf_access_via_msr()) - return 1; - - if (!no_perf && has_amperf_access_via_perf()) - return 1; - - return 0; + return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) @@ -7039,7 +7638,7 @@ int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const s return -1; const unsigned int type = read_perf_type(cai->perf_subsys); - const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); @@ -7057,12 +7656,120 @@ int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const st { int ret = add_cstate_perf_counter_(cpu, cci, cai); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; } +int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + if (no_perf) + return -1; + + const unsigned int type = read_perf_type(cai->perf_subsys); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); + + const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); + + if (fd_counter == -1) + return -1; + + /* If it's the first counter opened, make it a group descriptor */ + if (cci->fd_perf == -1) + cci->fd_perf = fd_counter; + + return fd_counter; +} + +int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + int ret = add_msr_perf_counter_(cpu, cci, cai); + + if (debug) + fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); + + return ret; +} + +void msr_perf_init_(void) +{ + const int mci_num = topo.max_cpu_num + 1; + + msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); + if (!msr_counter_info) + err(1, "calloc msr_counter_info"); + msr_counter_info_size = mci_num; + + for (int cpu = 0; cpu < mci_num; ++cpu) + msr_counter_info[cpu].fd_perf = -1; + + for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { + + struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; + + cai->present = false; + + for (int cpu = 0; cpu < mci_num; ++cpu) { + + struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; + + if (cpu_is_not_allowed(cpu)) + continue; + + if (cai->needed) { + /* Use perf API for this counter */ + if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; + cai->present = true; + + /* User MSR for this counter */ + } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; + cci->msr[cai->rci_index] = cai->msr; + cci->msr_mask[cai->rci_index] = cai->msr_mask; + cai->present = true; + } + } + } + } +} + +/* Initialize data for reading perf counters from the MSR group. */ +void msr_perf_init(void) +{ + bool need_amperf = false, need_smi = false; + const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); + + need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) + || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; + + if (BIC_IS_ENABLED(BIC_SMI)) + need_smi = true; + + /* Enable needed counters */ + msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; + + msr_perf_init_(); + + const bool has_amperf = has_amperf_access(); + const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; + + has_aperf_access = has_amperf; + + if (has_amperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + BIC_PRESENT(BIC_SMI); + } + + if (has_smi) + BIC_PRESENT(BIC_SMI); +} + void cstate_perf_init_(bool soft_c1) { bool has_counter; @@ -7127,17 +7834,17 @@ void cstate_perf_init_(bool soft_c1) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { - cci->source[cai->rci_index] = CSTATE_SOURCE_PERF; + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit && probe_msr(cpu, cai->msr) == 0) { - cci->source[cai->rci_index] = CSTATE_SOURCE_MSR; + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } } - if (cci->source[cai->rci_index] != CSTATE_SOURCE_NONE) { + if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { has_counter = true; cores_visited[core_id] = true; pkg_visited[pkg_id] = true; @@ -7320,12 +8027,6 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); - if (has_aperf && has_amperf_access()) { - BIC_PRESENT(BIC_Avg_MHz); - BIC_PRESENT(BIC_Busy); - BIC_PRESENT(BIC_Bzy_MHz); - BIC_PRESENT(BIC_IPC); - } do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -7442,6 +8143,11 @@ static void counter_info_init(void) if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; } + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { + msr_counter_arch_infos[i].present = false; + msr_counter_arch_infos[i].needed = false; + } } void probe_pm_features(void) @@ -7817,100 +8523,446 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } -static void set_amperf_source(void) +bool has_added_counters(void) { - amperf_source = AMPERF_SOURCE_PERF; + /* + * It only makes sense to call this after the command line is parsed, + * otherwise sys structure is not populated. + */ - const bool aperf_required = is_aperf_access_required(); + return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; +} - if (no_perf || !aperf_required || !has_amperf_access_via_perf()) - amperf_source = AMPERF_SOURCE_MSR; +void check_msr_access(void) +{ + check_dev_msr(); + check_msr_permission(); - if (quiet || !debug) - return; + if (no_msr) + bic_disable_msr_access(); +} - fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); +void check_perf_access(void) +{ + if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) + bic_enabled &= ~BIC_IPC; } -bool has_added_counters(void) +int added_perf_counters_init_(struct perf_counter_info *pinfo) +{ + size_t num_domains = 0; + unsigned int next_domain; + bool *domain_visited; + unsigned int perf_type, perf_config; + double perf_scale; + int fd_perf; + + if (!pinfo) + return 0; + + const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); + + domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); + + while (pinfo) { + switch (pinfo->scope) { + case SCOPE_CPU: + num_domains = topo.max_cpu_num + 1; + break; + + case SCOPE_CORE: + num_domains = topo.max_core_id + 1; + break; + + case SCOPE_PACKAGE: + num_domains = topo.max_package_id + 1; + break; + } + + /* Allocate buffer for file descriptor for each domain. */ + pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); + if (!pinfo->fd_perf_per_domain) + errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); + + for (size_t i = 0; i < num_domains; ++i) + pinfo->fd_perf_per_domain[i] = -1; + + pinfo->num_domains = num_domains; + pinfo->scale = 1.0; + + memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); + + for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { + + next_domain = cpu_to_domain(pinfo, cpu); + + assert(next_domain < num_domains); + + if (cpu_is_not_allowed(cpu)) + continue; + + if (domain_visited[next_domain]) + continue; + + perf_type = read_perf_type(pinfo->device); + if (perf_type == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "type"); + continue; + } + + perf_config = read_perf_config(pinfo->device, pinfo->event); + if (perf_config == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "config"); + continue; + } + + /* Scale is not required, some counters just don't have it. */ + perf_scale = read_perf_scale(pinfo->device, pinfo->event); + if (perf_scale == 0.0) + perf_scale = 1.0; + + fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); + if (fd_perf == -1) { + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", + __func__, pinfo->device, pinfo->event, cpu); + continue; + } + + domain_visited[next_domain] = 1; + pinfo->fd_perf_per_domain[next_domain] = fd_perf; + pinfo->scale = perf_scale; + + if (debug) + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", + pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + } + + pinfo = pinfo->next; + } + + free(domain_visited); + + return 0; +} + +void added_perf_counters_init(void) +{ + if (added_perf_counters_init_(sys.perf_tp)) + errx(1, "%s: %s", __func__, "thread"); + + if (added_perf_counters_init_(sys.perf_cp)) + errx(1, "%s: %s", __func__, "core"); + + if (added_perf_counters_init_(sys.perf_pp)) + errx(1, "%s: %s", __func__, "package"); +} + +int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) +{ + int fd_telem_info; + FILE *file_telem_info; + unsigned long value; + + fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); + if (fd_telem_info == -1) + return -1; + + file_telem_info = fdopen(fd_telem_info, "r"); + if (file_telem_info == NULL) { + close(fd_telem_info); + return -1; + } + + if (fscanf(file_telem_info, format, &value) != 1) { + fclose(file_telem_info); + return -1; + } + + fclose(file_telem_info); + + *output = value; + + return 0; +} + +struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) +{ + DIR *dirp; + struct dirent *entry; + struct stat st; + unsigned int telem_idx; + int fd_telem_dir, fd_pmt; + unsigned long guid, size, offset; + size_t mmap_size; + void *mmio; + struct pmt_mmio *ret = NULL; + + if (stat(SYSFS_TELEM_PATH, &st) == -1) + return NULL; + + dirp = opendir(SYSFS_TELEM_PATH); + if (dirp == NULL) + return NULL; + + for (;;) { + entry = readdir(dirp); + + if (entry == NULL) + break; + + if (strcmp(entry->d_name, ".") == 0) + continue; + + if (strcmp(entry->d_name, "..") == 0) + continue; + + if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) + continue; + + if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + break; + } + + if (!S_ISDIR(st.st_mode)) + continue; + + fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) { + break; + } + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + close(fd_telem_dir); + break; + } + + if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { + close(fd_telem_dir); + break; + } + + if (guid != target_guid) { + close(fd_telem_dir); + continue; + } + + if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { + close(fd_telem_dir); + break; + } + + assert(offset == 0); + + fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); + if (fd_pmt == -1) + goto loop_cleanup_and_break; + + mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); + if (mmio != MAP_FAILED) { + + if (debug) + fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); + + ret = calloc(1, sizeof(*ret)); + + if (!ret) { + fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); + exit(1); + } + + ret->guid = guid; + ret->mmio_base = mmio; + ret->pmt_offset = offset; + ret->size = size; + + ret->next = pmt_mmios; + pmt_mmios = ret; + } + +loop_cleanup_and_break: + close(fd_pmt); + close(fd_telem_dir); + break; + } + + closedir(dirp); + + return ret; +} + +struct pmt_mmio *pmt_mmio_find(unsigned int guid) +{ + struct pmt_mmio *pmmio = pmt_mmios; + + while (pmmio) { + if (pmmio->guid == guid) + return pmmio; + + pmmio = pmmio->next; + } + + return NULL; +} + +void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) { + char *ret; + + /* Get base of mmaped PMT file. */ + ret = (char *)pmmio->mmio_base; + /* - * It only makes sense to call this after the command line is parsed, - * otherwise sys structure is not populated. + * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. + * It's not guaranteed that the mmaped memory begins with the telemetry data + * - we might have to apply the offset first. */ + ret += pmmio->pmt_offset; - return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; + /* Apply the counter offset to get the address to the mmaped counter. */ + ret += counter_offset; + + return ret; } -bool is_msr_access_required(void) +struct pmt_mmio *pmt_add_guid(unsigned int guid) { - if (no_msr) - return false; - - if (has_added_counters()) - return true; - - return BIC_IS_ENABLED(BIC_SMI) - || BIC_IS_ENABLED(BIC_CPU_c1) - || BIC_IS_ENABLED(BIC_CPU_c3) - || BIC_IS_ENABLED(BIC_CPU_c6) - || BIC_IS_ENABLED(BIC_CPU_c7) - || BIC_IS_ENABLED(BIC_Mod_c6) - || BIC_IS_ENABLED(BIC_CoreTmp) - || BIC_IS_ENABLED(BIC_Totl_c0) - || BIC_IS_ENABLED(BIC_Any_c0) - || BIC_IS_ENABLED(BIC_GFX_c0) - || BIC_IS_ENABLED(BIC_CPUGFX) - || BIC_IS_ENABLED(BIC_Pkgpc3) - || BIC_IS_ENABLED(BIC_Pkgpc6) - || BIC_IS_ENABLED(BIC_Pkgpc2) - || BIC_IS_ENABLED(BIC_Pkgpc7) - || BIC_IS_ENABLED(BIC_Pkgpc8) - || BIC_IS_ENABLED(BIC_Pkgpc9) - || BIC_IS_ENABLED(BIC_Pkgpc10) - /* TODO: Multiplex access with perf */ - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_PkgWatt) - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_GFXWatt) - || BIC_IS_ENABLED(BIC_RAMWatt) - || BIC_IS_ENABLED(BIC_Pkg_J) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_GFX_J) - || BIC_IS_ENABLED(BIC_RAM_J) - || BIC_IS_ENABLED(BIC_PKG__) - || BIC_IS_ENABLED(BIC_RAM__) - || BIC_IS_ENABLED(BIC_PkgTmp) - || (is_aperf_access_required() && !has_amperf_access_via_perf()); + struct pmt_mmio *ret; + + ret = pmt_mmio_find(guid); + if (!ret) + ret = pmt_mmio_open(guid); + + return ret; } -void check_msr_access(void) +enum pmt_open_mode { + PMT_OPEN_TRY, /* Open failure is not an error. */ + PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ +}; + +struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) { - if (!is_msr_access_required()) - no_msr = 1; + while (pcounter) { + if (strcmp(pcounter->name, name) == 0) + break; - check_dev_msr(); - check_msr_permission(); + pcounter = pcounter->next; + } - if (no_msr) - bic_disable_msr_access(); + return pcounter; } -void check_perf_access(void) +struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) { - const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); + switch (scope) { + case SCOPE_CPU: + return &sys.pmt_tp; + case SCOPE_CORE: + return &sys.pmt_cp; + case SCOPE_PACKAGE: + return &sys.pmt_pp; + } - if (no_perf || !intrcount_required || !has_instr_count_access()) - bic_enabled &= ~BIC_IPC; + __builtin_unreachable(); +} - const bool aperf_required = is_aperf_access_required(); +void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) +{ + /* Make sure the new domain fits. */ + if (domain_id >= pcounter->num_domains) + pmt_counter_resize(pcounter, domain_id + 1); - if (!aperf_required || !has_amperf_access()) { - bic_enabled &= ~BIC_Avg_MHz; - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; - bic_enabled &= ~BIC_IPC; + assert(pcounter->domains); + assert(domain_id < pcounter->num_domains); + + pcounter->domains[domain_id].pcounter = pmmio; +} + +int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, + unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, + enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) +{ + struct pmt_mmio *mmio; + struct pmt_counter *pcounter; + struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); + bool new_counter = false; + int conflict = 0; + + if (lsb > msb) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); + exit(1); + } + + if (msb >= 64) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); + exit(1); + } + + mmio = pmt_add_guid(guid); + if (!mmio) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + exit(1); + } + + return 1; + } + + if (offset >= mmio->size) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); + exit(1); + } + + return 1; + } + + pcounter = pmt_find_counter(*pmt_root, name); + if (!pcounter) { + pcounter = calloc(1, sizeof(*pcounter)); + new_counter = true; + } + + if (new_counter) { + strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); + pcounter->type = type; + pcounter->scope = scope; + pcounter->lsb = lsb; + pcounter->msb = msb; + pcounter->format = format; + } else { + conflict += pcounter->type != type; + conflict += pcounter->scope != scope; + conflict += pcounter->lsb != lsb; + conflict += pcounter->msb != msb; + conflict += pcounter->format != format; + } + + if (conflict) { + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", + __func__, name); + exit(1); + } + + pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); + + if (new_counter) { + pcounter->next = *pmt_root; + *pmt_root = pcounter; + } + + return 0; +} + +void pmt_init(void) +{ + if (BIC_IS_ENABLED(BIC_Diec6)) { + pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, + PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, + 0, PMT_OPEN_TRY); } } @@ -7923,16 +8975,18 @@ void turbostat_init() process_cpuid(); counter_info_init(); probe_pm_features(); - set_amperf_source(); + msr_perf_init(); linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); + pmt_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (DO_BIC(BIC_IPC)) - (void)get_instr_count_fd(base_cpu); + if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) + BIC_PRESENT(BIC_IPC); /* * If TSC tweak is needed, but couldn't get it, @@ -8017,7 +9071,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.05.10 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2024.07.26 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -8049,7 +9103,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) - printf("%s: %s %s\n", __func__, name, mp->name); + fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); if (!strncmp(name, mp->name, strlen(mp->name))) return mp; } @@ -8066,8 +9120,8 @@ int add_counter(unsigned int msr_num, char *path, char *name, errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); if (debug) - printf("%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", __func__, msr_num, - path, name, width, scope, type, format, flags, id); + fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", + __func__, msr_num, path, name, width, scope, type, format, flags, id); switch (scope) { @@ -8075,7 +9129,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.tp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { @@ -8087,7 +9141,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.cp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { @@ -8099,7 +9153,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.pp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { @@ -8116,6 +9170,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = calloc(1, sizeof(struct msr_counter)); if (msrp == NULL) err(-1, "calloc msr_counter"); + msrp->msr_num = msr_num; strncpy(msrp->name, name, NAME_BYTES - 1); msrp->width = width; @@ -8156,11 +9211,106 @@ int add_counter(unsigned int msr_num, char *path, char *name, return 0; } -void parse_add_command(char *add_command) +/* + * Initialize the fields used for identifying and opening the counter. + * + * Defer the initialization of any runtime buffers for actually reading + * the counters for when we initialize all perf counters, so we can later + * easily call re_initialize(). + */ +struct perf_counter_info *make_perf_counter_info(const char *perf_device, + const char *perf_event, + const char *name, + unsigned int width, + enum counter_scope scope, + enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + pinfo = calloc(1, sizeof(*pinfo)); + if (!pinfo) + errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); + + strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); + strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); + + strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); + pinfo->width = width; + pinfo->scope = scope; + pinfo->type = type; + pinfo->format = format; + + return pinfo; +} + +int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, + enum counter_scope scope, enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + switch (scope) { + case SCOPE_CPU: + if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { + warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_CORE: + if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { + warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_PACKAGE: + if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { + warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + } + + pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); + + if (!pinfo) + return -1; + + switch (scope) { + case SCOPE_CPU: + pinfo->next = sys.perf_tp; + sys.perf_tp = pinfo; + ++sys.added_thread_perf_counters; + break; + + case SCOPE_CORE: + pinfo->next = sys.perf_cp; + sys.perf_cp = pinfo; + ++sys.added_core_perf_counters; + break; + + case SCOPE_PACKAGE: + pinfo->next = sys.perf_pp; + sys.perf_pp = pinfo; + ++sys.added_package_perf_counters; + break; + } + + // FIXME: we might not have debug here yet + if (debug) + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", + __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + + return 0; +} + +void parse_add_command_msr(char *add_command) { int msr_num = 0; char *path = NULL; - char name_buffer[NAME_BYTES] = ""; + char perf_device[PERF_DEV_NAME_BYTES] = ""; + char perf_event[PERF_EVT_NAME_BYTES] = ""; + char name_buffer[PERF_NAME_BYTES] = ""; int width = 64; int fail = 0; enum counter_scope scope = SCOPE_CPU; @@ -8175,6 +9325,11 @@ void parse_add_command(char *add_command) if (sscanf(add_command, "msr%d", &msr_num) == 1) goto next; + BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); + BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); + if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) + goto next; + if (*add_command == '/') { path = add_command; goto next; @@ -8222,7 +9377,8 @@ void parse_add_command(char *add_command) goto next; } - if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { char *eos; eos = strchr(name_buffer, ','); @@ -8239,21 +9395,33 @@ next: } } - if ((msr_num == 0) && (path == NULL)) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); + if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); fail++; } + /* Test for non-empty perf_device and perf_event */ + const bool is_perf_counter = perf_device[0] && perf_event[0]; + /* generate default column header */ if (*name_buffer == '\0') { - if (width == 32) - sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); - else - sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + if (is_perf_counter) { + snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); + } else { + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + } } - if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) - fail++; + if (is_perf_counter) { + if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) + fail++; + } else { + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) + fail++; + } if (fail) { help(); @@ -8261,6 +9429,195 @@ next: } } +bool starts_with(const char *str, const char *prefix) +{ + return strncmp(prefix, str, strlen(prefix)) == 0; +} + +void parse_add_command_pmt(char *add_command) +{ + char *name = NULL; + char *type_name = NULL; + char *format_name = NULL; + unsigned int offset; + unsigned int lsb; + unsigned int msb; + unsigned int guid; + unsigned int domain_id; + enum counter_scope scope = 0; + enum pmt_datatype type = PMT_TYPE_RAW; + enum counter_format format = FORMAT_RAW; + bool has_offset = false; + bool has_lsb = false; + bool has_msb = false; + bool has_format = true; /* Format has a default value. */ + bool has_guid = false; + bool has_scope = false; + bool has_type = true; /* Type has a default value. */ + + /* Consume the "pmt," prefix. */ + add_command = strchr(add_command, ','); + if (!add_command) { + help(); + exit(1); + } + ++add_command; + + while (add_command) { + if (starts_with(add_command, "name=")) { + name = add_command + strlen("name="); + goto next; + } + + if (starts_with(add_command, "type=")) { + type_name = add_command + strlen("type="); + goto next; + } + + if (starts_with(add_command, "domain=")) { + const size_t prefix_len = strlen("domain="); + + if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { + scope = SCOPE_CPU; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { + scope = SCOPE_CORE; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { + scope = SCOPE_PACKAGE; + has_scope = true; + } + + if (!has_scope) { + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", + __func__); + exit(1); + } + + goto next; + } + + if (starts_with(add_command, "format=")) { + format_name = add_command + strlen("format="); + goto next; + } + + if (sscanf(add_command, "offset=%u", &offset) == 1) { + has_offset = true; + goto next; + } + + if (sscanf(add_command, "lsb=%u", &lsb) == 1) { + has_lsb = true; + goto next; + } + + if (sscanf(add_command, "msb=%u", &msb) == 1) { + has_msb = true; + goto next; + } + + if (sscanf(add_command, "guid=%x", &guid) == 1) { + has_guid = true; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) { + *add_command = '\0'; + add_command++; + } + } + + if (!name) { + printf("%s: missing %s\n", __func__, "name"); + exit(1); + } + + if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { + printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); + exit(1); + } + + if (format_name) { + has_format = false; + + if (strcmp("raw", format_name) == 0) { + format = FORMAT_RAW; + has_format = true; + } + + if (strcmp("delta", format_name) == 0) { + format = FORMAT_DELTA; + has_format = true; + } + + if (!has_format) { + fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + exit(1); + } + } + + if (type_name) { + has_type = false; + + if (strcmp("raw", type_name) == 0) { + type = PMT_TYPE_RAW; + has_type = true; + } + + if (strcmp("txtal_time", type_name) == 0) { + type = PMT_TYPE_XTAL_TIME; + has_type = true; + } + + if (!has_type) { + printf("%s: invalid %s: %s\n", __func__, "type", type_name); + exit(1); + } + } + + if (!has_offset) { + printf("%s : missing %s\n", __func__, "offset"); + exit(1); + } + + if (!has_lsb) { + printf("%s: missing %s\n", __func__, "lsb"); + exit(1); + } + + if (!has_msb) { + printf("%s: missing %s\n", __func__, "msb"); + exit(1); + } + + if (!has_guid) { + printf("%s: missing %s\n", __func__, "guid"); + exit(1); + } + + if (!has_scope) { + printf("%s: missing %s\n", __func__, "scope"); + exit(1); + } + + if (lsb > msb) { + printf("%s: lsb > msb doesn't make sense\n", __func__); + exit(1); + } + + pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); +} + +void parse_add_command(char *add_command) +{ + if (strncmp(add_command, "pmt", strlen("pmt")) == 0) + return parse_add_command_pmt(add_command); + return parse_add_command_msr(add_command); +} + int is_deferred_add(char *name) { int i; @@ -8424,7 +9781,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; diff --git a/tools/rcu/rcu-updaters.sh b/tools/rcu/rcu-updaters.sh new file mode 100755 index 000000000000..4ef1397927bb --- /dev/null +++ b/tools/rcu/rcu-updaters.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Run bpftrace to obtain a histogram of the types of primitives used to +# initiate RCU grace periods. The count associated with rcu_gp_init() +# is the number of normal (non-expedited) grace periods. +# +# Usage: rcu-updaters.sh [ duration-in-seconds ] +# +# Note that not all kernel builds have all of these functions. In those +# that do not, this script will issue a diagnostic for each that is not +# found, but continue normally for the rest of the functions. + +duration=${1} +if test -n "${duration}" +then + exitclause='interval:s:'"${duration}"' { exit(); }' +else + echo 'Hit control-C to end sample and print results.' +fi +bpftrace -e 'kprobe:kvfree_call_rcu, + kprobe:call_rcu, + kprobe:call_rcu_tasks, + kprobe:call_rcu_tasks_rude, + kprobe:call_rcu_tasks_trace, + kprobe:call_srcu, + kprobe:rcu_barrier, + kprobe:rcu_barrier_tasks, + kprobe:rcu_barrier_tasks_rude, + kprobe:rcu_barrier_tasks_trace, + kprobe:srcu_barrier, + kprobe:synchronize_rcu, + kprobe:synchronize_rcu_expedited, + kprobe:synchronize_rcu_tasks, + kprobe:synchronize_rcu_tasks_rude, + kprobe:synchronize_rcu_tasks_trace, + kprobe:synchronize_srcu, + kprobe:synchronize_srcu_expedited, + kprobe:get_state_synchronize_rcu, + kprobe:get_state_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu, + kprobe:start_poll_synchronize_rcu_expedited, + kprobe:start_poll_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu_expedited_full, + kprobe:poll_state_synchronize_rcu, + kprobe:poll_state_synchronize_rcu_full, + kprobe:cond_synchronize_rcu, + kprobe:cond_synchronize_rcu_full, + kprobe:start_poll_synchronize_srcu, + kprobe:poll_state_synchronize_srcu, + kprobe:rcu_gp_init + { @counts[func] = count(); } '"${exitclause}" diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 3482248aa344..90d5afd52dd0 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + struct device *dev = &port->dev; if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; + dev_set_drvdata(dev, cxlhdm); return cxlhdm; } diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index eaf091a3d331..129f179b0ac5 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -385,19 +385,21 @@ struct cxl_test_gen_media { struct cxl_test_gen_media gen_media = { .id = CXL_EVENT_GEN_MEDIA_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_gen_media), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_gen_media), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x2000), + .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, + .transaction_type = CXL_GMER_TRANS_HOST_WRITE, + /* .validity_flags = <set below> */ + .channel = 1, + .rank = 30, }, - .phys_addr = cpu_to_le64(0x2000), - .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, - .transaction_type = CXL_GMER_TRANS_HOST_WRITE, - /* .validity_flags = <set below> */ - .channel = 1, - .rank = 30 }, }; @@ -409,18 +411,20 @@ struct cxl_test_dram { struct cxl_test_dram dram = { .id = CXL_EVENT_DRAM_UUID, .rec = { - .hdr = { - .length = sizeof(struct cxl_test_dram), - .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, - /* .handle = Set dynamically */ - .related_handle = cpu_to_le16(0), + .media_hdr = { + .hdr = { + .length = sizeof(struct cxl_test_dram), + .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED, + /* .handle = Set dynamically */ + .related_handle = cpu_to_le16(0), + }, + .phys_addr = cpu_to_le64(0x8000), + .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, + .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, + .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, + /* .validity_flags = <set below> */ + .channel = 1, }, - .phys_addr = cpu_to_le64(0x8000), - .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT, - .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR, - .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, - /* .validity_flags = <set below> */ - .channel = 1, .bank_group = 5, .bank = 2, .column = {0xDE, 0xAD}, @@ -474,11 +478,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds, static void cxl_mock_add_event_logs(struct mock_event_store *mes) { put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK, - &gen_media.rec.validity_flags); + &gen_media.rec.media_hdr.validity_flags); put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP | CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN, - &dram.rec.validity_flags); + &dram.rec.media_hdr.validity_flags); mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed); mes_add_event(mes, CXL_EVENT_TYPE_INFO, @@ -1131,27 +1135,28 @@ static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds) return (count >= poison_inject_dev_max); } -static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) +static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa) { + /* Return EBUSY to match the CXL driver handling */ if (mock_poison_dev_max_injected(cxlds)) { dev_dbg(cxlds->dev, "Device poison injection limit has been reached: %d\n", - MOCK_INJECT_DEV_MAX); - return false; + poison_inject_dev_max); + return -EBUSY; } for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) { if (!mock_poison_list[i].cxlds) { mock_poison_list[i].cxlds = cxlds; mock_poison_list[i].dpa = dpa; - return true; + return 0; } } dev_dbg(cxlds->dev, "Mock test poison injection limit has been reached: %d\n", MOCK_INJECT_TEST_MAX); - return false; + return -ENXIO; } static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa) @@ -1175,10 +1180,8 @@ static int mock_inject_poison(struct cxl_dev_state *cxlds, dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa); return 0; } - if (!mock_poison_add(cxlds, dpa)) - return -ENXIO; - return 0; + return mock_poison_add(cxlds, dpa); } static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa) diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 57bf2688edfd..67503089e6a0 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -15,12 +15,12 @@ static int memblock_initialization_check(void) PREFIX_PUSH(); ASSERT_NE(memblock.memory.regions, NULL); - ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.cnt, 0); ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0); ASSERT_NE(memblock.reserved.regions, NULL); - ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.cnt, 0); ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0); @@ -982,6 +982,262 @@ static int memblock_reserve_many_check(void) return 0; } + +/* + * A test that trying to reserve the 129th memory block at all locations. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as reserved.regions. + * + * 0 1 2 128 + * +-------+ +-------+ +-------+ +-------+ + * | 32K | | 32K | | 32K | ... | 32K | + * +-------+-------+-------+-------+-------+ +-------+ + * |<-32K->| |<-32K->| + * + */ +/* Keep the gap so these memory region will not be merged. */ +#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx)) +static int memblock_reserve_all_locations_check(void) +{ + int i, skip; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_reserved_regions_size; + + PREFIX_PUSH(); + + /* Reserve the 129th memory block for all possible positions*/ + for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) { + reset_memblock_regions(); + memblock_allow_resize(); + + /* Add a valid memory region used by double_array(). */ + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + + for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) { + if (i == skip) + continue; + + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(MEMORY_BASE(i), MEM_SIZE); + + if (i < skip) { + ASSERT_EQ(memblock.reserved.cnt, i + 1); + ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE); + } else { + ASSERT_EQ(memblock.reserved.cnt, i); + ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE); + } + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(MEMORY_BASE(skip), MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + } + + test_pass_pop(); + + return 0; +} + +/* + * A test that trying to reserve the 129th memory block at all locations. + * Expect to trigger memblock_double_array() to double the + * memblock.memory.max, find a new valid memory as reserved.regions. And make + * sure it doesn't conflict with the range we want to reserve. + * + * For example, we have 128 regions in reserved and now want to reserve + * the skipped one. Since reserved is full, memblock_double_array() would find + * an available range in memory for the new array. We intended to put two + * ranges in memory with one is the exact range of the skipped one. Before + * commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling + * reserved array"), the new array would sits in the skipped range which is a + * conflict. The expected new array should be allocated from memory.regions[0]. + * + * 0 1 + * memory +-------+ +-------+ + * | 32K | | 32K | + * +-------+ ------+-------+-------+-------+ + * |<-32K->|<-32K->|<-32K->| + * + * 0 skipped 127 + * reserved +-------+ ......... +-------+ + * | 32K | . 32K . ... | 32K | + * +-------+-------+-------+ +-------+ + * |<-32K->| + * ^ + * | + * | + * skipped one + */ +/* Keep the gap so these memory region will not be merged. */ +#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx)) +static int memblock_reserve_many_may_conflict_check(void) +{ + int i, skip; + void *orig_region; + struct region r = { + .base = SZ_16K, + .size = SZ_16K, + }; + phys_addr_t new_reserved_regions_size; + + /* + * 0 1 129 + * +---+ +---+ +---+ + * |32K| |32K| .. |32K| + * +---+ +---+ +---+ + * + * Pre-allocate the range for 129 memory block + one range for double + * memblock.reserved.regions at idx 0. + */ + dummy_physical_memory_init(); + phys_addr_t memory_base = dummy_physical_memory_base(); + phys_addr_t offset = PAGE_ALIGN(memory_base); + + PREFIX_PUSH(); + + /* Reserve the 129th memory block for all possible positions*/ + for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) { + reset_memblock_regions(); + memblock_allow_resize(); + + reset_memblock_attributes(); + /* Add a valid memory region used by double_array(). */ + memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE); + /* + * Add a memory region which will be reserved as 129th memory + * region. This is not expected to be used by double_array(). + */ + memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE); + + for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) { + if (i == skip) + continue; + + /* Reserve some fakes memory region to fulfill the memblock. */ + memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE); + + if (i < skip) { + ASSERT_EQ(memblock.reserved.cnt, i); + ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE); + } else { + ASSERT_EQ(memblock.reserved.cnt, i - 1); + ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE); + } + } + + orig_region = memblock.reserved.regions; + + /* This reserve the 129 memory_region, and makes it double array. */ + memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE); + + /* + * This is the memory region size used by the doubled reserved.regions, + * and it has been reserved due to it has been used. The size is used to + * calculate the total_size that the memblock.reserved have now. + */ + new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) * + sizeof(struct memblock_region)); + /* + * The double_array() will find a free memory region as the new + * reserved.regions, and the used memory region will be reserved, so + * there will be one more region exist in the reserved memblock. And the + * one more reserved region's size is new_reserved_regions_size. + */ + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * The first reserved region is allocated for double array + * with the size of new_reserved_regions_size and the base to be + * MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size + */ + ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size, + MEMORY_BASE_OFFSET(0, offset) + SZ_32K); + ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size); + + /* + * Now memblock_double_array() works fine. Let's check after the + * double_array(), the memblock_reserve() still works as normal. + */ + memblock_reserve(r.base, r.size); + ASSERT_EQ(memblock.reserved.regions[0].base, r.base); + ASSERT_EQ(memblock.reserved.regions[0].size, r.size); + + ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3); + ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE + + new_reserved_regions_size + + r.size); + ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + } + + dummy_physical_memory_cleanup(); + + test_pass_pop(); + + return 0; +} + static int memblock_reserve_checks(void) { prefix_reset(); @@ -997,6 +1253,8 @@ static int memblock_reserve_checks(void) memblock_reserve_between_check(); memblock_reserve_near_max_check(); memblock_reserve_many_check(); + memblock_reserve_all_locations_check(); + memblock_reserve_many_may_conflict_check(); prefix_pop(); @@ -1295,7 +1553,7 @@ static int memblock_remove_only_region_check(void) ASSERT_EQ(rgn->base, 0); ASSERT_EQ(rgn->size, 0); - ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.cnt, 0); ASSERT_EQ(memblock.memory.total_size, 0); test_pass_pop(); @@ -1723,7 +1981,7 @@ static int memblock_free_only_region_check(void) ASSERT_EQ(rgn->base, 0); ASSERT_EQ(rgn->size, 0); - ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.cnt, 0); ASSERT_EQ(memblock.reserved.total_size, 0); test_pass_pop(); @@ -2129,6 +2387,53 @@ static int memblock_trim_memory_checks(void) return 0; } +static int memblock_overlaps_region_check(void) +{ + struct region r = { + .base = SZ_1G, + .size = SZ_4M + }; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_add(r.base, r.size); + + /* Far Away */ + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M)); + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M)); + + /* Neighbor */ + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M)); + ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M)); + + /* Partial Overlap */ + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M)); + + /* Totally Overlap */ + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M)); + ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M)); + + test_pass_pop(); + + return 0; +} + +static int memblock_overlaps_region_checks(void) +{ + prefix_reset(); + prefix_push("memblock_overlaps_region"); + test_print("Running memblock_overlaps_region tests...\n"); + + memblock_overlaps_region_check(); + + prefix_pop(); + + return 0; +} + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2138,6 +2443,7 @@ int memblock_basic_checks(void) memblock_free_checks(); memblock_bottom_up_checks(); memblock_trim_memory_checks(); + memblock_overlaps_region_checks(); return 0; } diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index f43b6f414983..3250c8e5124b 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -40,13 +40,13 @@ void reset_memblock_regions(void) { memset(memblock.memory.regions, 0, memblock.memory.cnt * sizeof(struct memblock_region)); - memblock.memory.cnt = 1; + memblock.memory.cnt = 0; memblock.memory.max = INIT_MEMBLOCK_REGIONS; memblock.memory.total_size = 0; memset(memblock.reserved.regions, 0, memblock.reserved.cnt * sizeof(struct memblock_region)); - memblock.reserved.cnt = 1; + memblock.reserved.cnt = 0; memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS; memblock.reserved.total_size = 0; } @@ -61,7 +61,7 @@ void reset_memblock_attributes(void) static inline void fill_memblock(void) { - memset(memory_block.base, 1, MEM_SIZE); + memset(memory_block.base, 1, PHYS_MEM_SIZE); } void setup_memblock(void) @@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[]) void dummy_physical_memory_init(void) { - memory_block.base = malloc(MEM_SIZE); + memory_block.base = malloc(PHYS_MEM_SIZE); assert(memory_block.base); fill_memblock(); } diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index b5ec59aa62d7..e1138e06c903 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -12,6 +12,7 @@ #include <../selftests/kselftest.h> #define MEM_SIZE SZ_32K +#define PHYS_MEM_SIZE SZ_16M #define NUMA_NODES 8 #define INIT_MEMBLOCK_REGIONS 128 @@ -39,6 +40,9 @@ enum test_flags { assert((_expected) == (_seen)); \ } while (0) +#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen) +#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen) + /** * ASSERT_NE(): * Check the condition diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index ea956082e6a4..e4313726fae3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -407,4 +407,5 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *g } EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm); +MODULE_DESCRIPTION("NVDIMM unit test"); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index b438f3d053ee..892e990c034a 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -987,5 +987,6 @@ static __exit void ndtest_exit(void) module_init(ndtest_init); module_exit(ndtest_exit); +MODULE_DESCRIPTION("Test non-NFIT devices"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("IBM Corporation"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a61df347a33d..cfd4378e2129 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3382,5 +3382,6 @@ static __exit void nfit_test_exit(void) module_init(nfit_test_init); module_exit(nfit_test_exit); +MODULE_DESCRIPTION("Test ACPI NFIT devices"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 7527f738b4a1..d1acd7d58850 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -5,8 +5,8 @@ CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu TARGETS = main idr-test multiorder xarray maple -CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \ - slab.o maple.o +LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o +CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o maple.o $(LIBS) OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ iteration_check_2.o benchmark.o diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c deleted file mode 100644 index 66ec4a24a203..000000000000 --- a/tools/testing/radix-tree/bitmap.c +++ /dev/null @@ -1,23 +0,0 @@ -/* lib/bitmap.c pulls in at least two other files. */ - -#include <linux/bitmap.h> - -void bitmap_clear(unsigned long *map, unsigned int start, int len) -{ - unsigned long *p = map + BIT_WORD(start); - const unsigned int size = start + len; - int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); - unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); - - while (len - bits_to_clear >= 0) { - *p &= ~mask_to_clear; - len -= bits_to_clear; - bits_to_clear = BITS_PER_LONG; - mask_to_clear = ~0UL; - p++; - } - if (len) { - mask_to_clear &= BITMAP_LAST_WORD_MASK(size); - *p &= ~mask_to_clear; - } -} diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index ca24f6839d50..84b8c3c92c79 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -424,6 +424,7 @@ void idr_checks(void) #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) void ida_dump(struct ida *); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index f1caf4bcf937..cd1cf05503b4 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -19,6 +19,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c index f20e12cbbfd4..d0e53bff1eb6 100644 --- a/tools/testing/radix-tree/xarray.c +++ b/tools/testing/radix-tree/xarray.c @@ -10,6 +10,7 @@ #define module_init(x) #define module_exit(x) #define MODULE_AUTHOR(x) +#define MODULE_DESCRIPTION(X) #define MODULE_LICENSE(x) #define dump_stack() assert(0) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9039f3709aff..bc8fe9e8f7f2 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -13,7 +13,8 @@ TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += damon -TARGETS += devices +TARGETS += devices/error_logs +TARGETS += devices/probe TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice @@ -21,6 +22,7 @@ TARGETS += drivers/net TARGETS += drivers/net/bonding TARGETS += drivers/net/team TARGETS += drivers/net/virtio_net +TARGETS += drivers/platform/x86/intel/ifs TARGETS += dt TARGETS += efivarfs TARGETS += exec @@ -251,6 +253,7 @@ ifdef INSTALL_PATH install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/ + install -m 744 kselftest/ksft.py $(INSTALL_PATH)/kselftest/ install -m 744 run_kselftest.sh $(INSTALL_PATH)/ rm -f $(TEST_LIST) @ret=1; \ diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index 1c04e5f638a0..2a4b2662035e 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -33,6 +33,8 @@ struct card_data { snd_ctl_t *handle; int card; + snd_ctl_card_info_t *info; + const char *card_name; struct pollfd pollfd; int num_ctls; snd_ctl_elem_list_t *ctls; @@ -91,8 +93,26 @@ static void find_controls(void) err = snd_card_get_longname(card, &card_longname); if (err != 0) card_longname = "Unknown"; - ksft_print_msg("Card %d - %s (%s)\n", card, - card_name, card_longname); + + err = snd_ctl_card_info_malloc(&card_data->info); + if (err != 0) + ksft_exit_fail_msg("Failed to allocate card info: %d\n", + err); + + err = snd_ctl_card_info(card_data->handle, card_data->info); + if (err == 0) { + card_data->card_name = snd_ctl_card_info_get_id(card_data->info); + if (!card_data->card_name) + ksft_print_msg("Failed to get card ID\n"); + } else { + ksft_print_msg("Failed to get card info: %d\n", err); + } + + if (!card_data->card_name) + card_data->card_name = "Unknown"; + + ksft_print_msg("Card %d/%s - %s (%s)\n", card, + card_data->card_name, card_name, card_longname); /* Count controls */ snd_ctl_elem_list_malloc(&card_data->ctls); @@ -389,16 +409,16 @@ static void test_ctl_get_value(struct ctl_data *ctl) /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { ksft_print_msg("%s is inactive\n", ctl->name); - ksft_test_result_skip("get_value.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("get_value.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } /* Can't test reading on an unreadable control */ if (!snd_ctl_elem_info_is_readable(ctl->info)) { ksft_print_msg("%s is not readable\n", ctl->name); - ksft_test_result_skip("get_value.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("get_value.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } @@ -413,8 +433,8 @@ static void test_ctl_get_value(struct ctl_data *ctl) err = -EINVAL; out: - ksft_test_result(err >= 0, "get_value.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(err >= 0, "get_value.%s.%d\n", + ctl->card->card_name, ctl->elem); } static bool strend(const char *haystack, const char *needle) @@ -431,7 +451,7 @@ static void test_ctl_name(struct ctl_data *ctl) { bool name_ok = true; - ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem, + ksft_print_msg("%s.%d %s\n", ctl->card->card_name, ctl->elem, ctl->name); /* Only boolean controls should end in Switch */ @@ -453,8 +473,8 @@ static void test_ctl_name(struct ctl_data *ctl) } } - ksft_test_result(name_ok, "name.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(name_ok, "name.%s.%d\n", + ctl->card->card_name, ctl->elem); } static void show_values(struct ctl_data *ctl, snd_ctl_elem_value_t *orig_val, @@ -626,28 +646,41 @@ static int write_and_verify(struct ctl_data *ctl, } /* + * We can't verify any specific value for volatile controls + * but we should still check that whatever we read is a valid + * vale for the control. + */ + if (snd_ctl_elem_info_is_volatile(ctl->info)) { + if (!ctl_value_valid(ctl, read_val)) { + ksft_print_msg("Volatile control %s has invalid value\n", + ctl->name); + return -EINVAL; + } + + return 0; + } + + /* * Check for an event if the value changed, or confirm that * there was none if it didn't. We rely on the kernel * generating the notification before it returns from the * write, this is currently true, should that ever change this * will most likely break and need updating. */ - if (!snd_ctl_elem_info_is_volatile(ctl->info)) { - err = wait_for_event(ctl, 0); - if (snd_ctl_elem_value_compare(initial_val, read_val)) { - if (err < 1) { - ksft_print_msg("No event generated for %s\n", - ctl->name); - show_values(ctl, initial_val, read_val); - ctl->event_missing++; - } - } else { - if (err != 0) { - ksft_print_msg("Spurious event generated for %s\n", - ctl->name); - show_values(ctl, initial_val, read_val); - ctl->event_spurious++; - } + err = wait_for_event(ctl, 0); + if (snd_ctl_elem_value_compare(initial_val, read_val)) { + if (err < 1) { + ksft_print_msg("No event generated for %s\n", + ctl->name); + show_values(ctl, initial_val, read_val); + ctl->event_missing++; + } + } else { + if (err != 0) { + ksft_print_msg("Spurious event generated for %s\n", + ctl->name); + show_values(ctl, initial_val, read_val); + ctl->event_spurious++; } } @@ -682,30 +715,30 @@ static void test_ctl_write_default(struct ctl_data *ctl) /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { ksft_print_msg("%s is inactive\n", ctl->name); - ksft_test_result_skip("write_default.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_default.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } if (!snd_ctl_elem_info_is_writable(ctl->info)) { ksft_print_msg("%s is not writeable\n", ctl->name); - ksft_test_result_skip("write_default.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_default.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } /* No idea what the default was for unreadable controls */ if (!snd_ctl_elem_info_is_readable(ctl->info)) { ksft_print_msg("%s couldn't read default\n", ctl->name); - ksft_test_result_skip("write_default.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_default.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } err = write_and_verify(ctl, ctl->def_val, NULL); - ksft_test_result(err >= 0, "write_default.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(err >= 0, "write_default.%s.%d\n", + ctl->card->card_name, ctl->elem); } static bool test_ctl_write_valid_boolean(struct ctl_data *ctl) @@ -815,15 +848,15 @@ static void test_ctl_write_valid(struct ctl_data *ctl) /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { ksft_print_msg("%s is inactive\n", ctl->name); - ksft_test_result_skip("write_valid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_valid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } if (!snd_ctl_elem_info_is_writable(ctl->info)) { ksft_print_msg("%s is not writeable\n", ctl->name); - ksft_test_result_skip("write_valid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_valid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } @@ -846,16 +879,16 @@ static void test_ctl_write_valid(struct ctl_data *ctl) default: /* No tests for this yet */ - ksft_test_result_skip("write_valid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_valid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } /* Restore the default value to minimise disruption */ write_and_verify(ctl, ctl->def_val, NULL); - ksft_test_result(pass, "write_valid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(pass, "write_valid.%s.%d\n", + ctl->card->card_name, ctl->elem); } static bool test_ctl_write_invalid_value(struct ctl_data *ctl, @@ -1027,15 +1060,15 @@ static void test_ctl_write_invalid(struct ctl_data *ctl) /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { ksft_print_msg("%s is inactive\n", ctl->name); - ksft_test_result_skip("write_invalid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_invalid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } if (!snd_ctl_elem_info_is_writable(ctl->info)) { ksft_print_msg("%s is not writeable\n", ctl->name); - ksft_test_result_skip("write_invalid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_invalid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } @@ -1058,28 +1091,28 @@ static void test_ctl_write_invalid(struct ctl_data *ctl) default: /* No tests for this yet */ - ksft_test_result_skip("write_invalid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result_skip("write_invalid.%s.%d\n", + ctl->card->card_name, ctl->elem); return; } /* Restore the default value to minimise disruption */ write_and_verify(ctl, ctl->def_val, NULL); - ksft_test_result(pass, "write_invalid.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(pass, "write_invalid.%s.%d\n", + ctl->card->card_name, ctl->elem); } static void test_ctl_event_missing(struct ctl_data *ctl) { - ksft_test_result(!ctl->event_missing, "event_missing.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(!ctl->event_missing, "event_missing.%s.%d\n", + ctl->card->card_name, ctl->elem); } static void test_ctl_event_spurious(struct ctl_data *ctl) { - ksft_test_result(!ctl->event_spurious, "event_spurious.%d.%d\n", - ctl->card->card, ctl->elem); + ksft_test_result(!ctl->event_spurious, "event_spurious.%s.%d\n", + ctl->card->card_name, ctl->elem); } int main(void) diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index de664dedb541..dbd7c222ce93 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -24,6 +24,8 @@ typedef struct timespec timestamp_t; struct card_data { int card; + snd_ctl_card_info_t *info; + const char *name; pthread_t thread; struct card_data *next; }; @@ -35,6 +37,7 @@ struct pcm_data { int card; int device; int subdevice; + const char *card_name; snd_pcm_stream_t stream; snd_config_t *pcm_config; struct pcm_data *next; @@ -167,6 +170,10 @@ static void find_pcms(void) config = get_alsalib_config(); while (card >= 0) { + card_data = calloc(1, sizeof(*card_data)); + if (!card_data) + ksft_exit_fail_msg("Out of memory\n"); + sprintf(name, "hw:%d", card); err = snd_ctl_open_lconf(&handle, name, 0, config); @@ -182,14 +189,29 @@ static void find_pcms(void) err = snd_card_get_longname(card, &card_longname); if (err != 0) card_longname = "Unknown"; - ksft_print_msg("Card %d - %s (%s)\n", card, - card_name, card_longname); + + err = snd_ctl_card_info_malloc(&card_data->info); + if (err != 0) + ksft_exit_fail_msg("Failed to allocate card info: %d\n", + err); + + err = snd_ctl_card_info(handle, card_data->info); + if (err == 0) { + card_data->name = snd_ctl_card_info_get_id(card_data->info); + if (!card_data->name) + ksft_print_msg("Failed to get card ID\n"); + } else { + ksft_print_msg("Failed to get card info: %d\n", err); + } + + if (!card_data->name) + card_data->name = "Unknown"; + + ksft_print_msg("Card %d/%s - %s (%s)\n", card, + card_data->name, card_name, card_longname); card_config = conf_by_card(card); - card_data = calloc(1, sizeof(*card_data)); - if (!card_data) - ksft_exit_fail_msg("Out of memory\n"); card_data->card = card; card_data->next = card_list; card_list = card_data; @@ -218,6 +240,10 @@ static void find_pcms(void) if (err < 0) ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n", dev, 0, stream); + + ksft_print_msg("%s.0 - %s\n", card_data->name, + snd_pcm_info_get_id(pcm_info)); + count = snd_pcm_info_get_subdevices_count(pcm_info); for (subdev = 0; subdev < count; subdev++) { sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream)); @@ -232,6 +258,7 @@ static void find_pcms(void) pcm_data->card = card; pcm_data->device = dev; pcm_data->subdevice = subdev; + pcm_data->card_name = card_data->name; pcm_data->stream = stream; pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL); pcm_data->next = pcm_list; @@ -294,9 +321,9 @@ static void test_pcm_time(struct pcm_data *data, enum test_class class, desc = conf_get_string(pcm_cfg, "description", NULL, NULL); if (desc) - ksft_print_msg("%s.%s.%d.%d.%d.%s - %s\n", + ksft_print_msg("%s.%s.%s.%d.%d.%s - %s\n", test_class_name, test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, data->subdevice, snd_pcm_stream_name(data->stream), desc); @@ -352,9 +379,9 @@ __format: old_format = format; format = snd_pcm_format_value(alt_formats[i]); if (format != SND_PCM_FORMAT_UNKNOWN) { - ksft_print_msg("%s.%d.%d.%d.%s.%s format %s -> %s\n", + ksft_print_msg("%s.%s.%d.%d.%s.%s format %s -> %s\n", test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, data->subdevice, snd_pcm_stream_name(data->stream), snd_pcm_access_name(access), snd_pcm_format_name(old_format), @@ -383,7 +410,7 @@ __format: goto __close; } if (rrate != rate) { - snprintf(msg, sizeof(msg), "rate mismatch %ld != %d", rate, rrate); + snprintf(msg, sizeof(msg), "rate mismatch %ld != %u", rate, rrate); goto __close; } rperiod_size = period_size; @@ -430,9 +457,9 @@ __format: goto __close; } - ksft_print_msg("%s.%s.%d.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n", + ksft_print_msg("%s.%s.%s.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n", test_class_name, test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, data->subdevice, snd_pcm_stream_name(data->stream), snd_pcm_access_name(access), snd_pcm_format_name(format), @@ -491,9 +518,10 @@ __close: * Anything specified as specific to this system * should always be supported. */ - ksft_test_result(!skip, "%s.%s.%d.%d.%d.%s.params\n", + ksft_test_result(!skip, "%s.%s.%s.%d.%d.%s.params\n", test_class_name, test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, + data->subdevice, snd_pcm_stream_name(data->stream)); break; default: @@ -501,14 +529,16 @@ __close: } if (!skip) - ksft_test_result(pass, "%s.%s.%d.%d.%d.%s\n", + ksft_test_result(pass, "%s.%s.%s.%d.%d.%s\n", test_class_name, test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, + data->subdevice, snd_pcm_stream_name(data->stream)); else - ksft_test_result_skip("%s.%s.%d.%d.%d.%s\n", + ksft_test_result_skip("%s.%s.%s.%d.%d.%s\n", test_class_name, test_name, - data->card, data->device, data->subdevice, + data->card_name, data->device, + data->subdevice, snd_pcm_stream_name(data->stream)); if (msg[0]) @@ -609,8 +639,8 @@ int main(void) conf->filename, conf->config_id); for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) { - ksft_test_result(false, "test.missing.%d.%d.%d.%s\n", - pcm->card, pcm->device, pcm->subdevice, + ksft_test_result(false, "test.missing.%s.%d.%d.%s\n", + pcm->card_name, pcm->device, pcm->subdevice, snd_pcm_stream_name(pcm->stream)); } diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index abe4d58d731d..e4fa507cbdd0 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -47,7 +47,7 @@ static void test_tpidr(pid_t child) /* ...write a new value.. */ write_iov.iov_len = sizeof(uint64_t); - write_val[0] = read_val[0]++; + write_val[0] = read_val[0] + 1; ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); ksft_test_result(ret == 0, "write_tpidr_one\n"); @@ -156,7 +156,7 @@ static void test_hw_debug(pid_t child, int type, const char *type_name) /* Zero is not currently architecturally valid */ ksft_test_result(arch, "%s_arch_set\n", type_name); } else { - ksft_test_result_skip("%s_arch_set\n"); + ksft_test_result_skip("%s_arch_set\n", type_name); } } diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index 00e52c966281..8362e7ec35ad 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -2,6 +2,7 @@ fp-pidbench fp-ptrace fp-stress fpsimd-test +kernel-test rdvl-sme rdvl-sve sve-probe-vls diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 55d4f00d9e8e..d171021e4cdd 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -12,6 +12,7 @@ TEST_GEN_PROGS := \ vec-syscfg \ za-fork za-ptrace TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ + kernel-test \ rdvl-sme rdvl-sve \ sve-test \ ssve-test \ diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index dd31647b00a2..faac24bdefeb 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -319,6 +319,19 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy) ksft_print_msg("Started %s\n", child->name); } +static void start_kernel(struct child_data *child, int cpu, int copy) +{ + int ret; + + ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + child_start(child, "./kernel-test"); + + ksft_print_msg("Started %s\n", child->name); +} + static void start_sve(struct child_data *child, int vl, int cpu) { int ret; @@ -438,7 +451,7 @@ int main(int argc, char **argv) int ret; int timeout = 10; int cpus, i, j, c; - int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + int sve_vl_count, sme_vl_count; bool all_children_started = false; int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; @@ -482,12 +495,7 @@ int main(int argc, char **argv) have_sme2 = false; } - /* Force context switching if we only have FPSIMD */ - if (!sve_vl_count && !sme_vl_count) - fpsimd_per_cpu = 2; - else - fpsimd_per_cpu = 1; - tests += cpus * fpsimd_per_cpu; + tests += cpus * 2; ksft_print_header(); ksft_set_plan(tests); @@ -542,8 +550,8 @@ int main(int argc, char **argv) tests); for (i = 0; i < cpus; i++) { - for (j = 0; j < fpsimd_per_cpu; j++) - start_fpsimd(&children[num_children++], i, j); + start_fpsimd(&children[num_children++], i, 0); + start_kernel(&children[num_children++], i, 0); for (j = 0; j < sve_vl_count; j++) start_sve(&children[num_children++], sve_vls[j], i); diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c new file mode 100644 index 000000000000..e8da3b4cbd23 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Limited. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include <linux/kernel.h> +#include <linux/if_alg.h> + +#define DATA_SIZE (16 * 4096) + +static int base, sock; + +static int digest_len; +static char *ref; +static char *digest; +static char *alg_name; + +static struct iovec data_iov; +static int zerocopy[2]; +static int sigs; +static int iter; + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + printf("Terminated by signal %d, iterations=%d, signals=%d\n", + sig, iter, sigs); + exit(0); +} + +static void handle_kick_signal(int sig, siginfo_t *info, void *context) +{ + sigs++; +} + +static char *drivers[] = { + "crct10dif-arm64-ce", + /* "crct10dif-arm64-neon", - Same priority as generic */ + "sha1-ce", + "sha224-arm64", + "sha224-arm64-neon", + "sha224-ce", + "sha256-arm64", + "sha256-arm64-neon", + "sha256-ce", + "sha384-ce", + "sha512-ce", + "sha3-224-ce", + "sha3-256-ce", + "sha3-384-ce", + "sha3-512-ce", + "sm3-ce", + "sm3-neon", +}; + +static bool create_socket(void) +{ + FILE *proc; + struct sockaddr_alg addr; + char buf[1024]; + char *c, *driver_name; + bool is_shash, match; + int ret, i; + + ret = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (ret < 0) { + if (errno == EAFNOSUPPORT) { + printf("AF_ALG not supported\n"); + return false; + } + + printf("Failed to create AF_ALG socket: %s (%d)\n", + strerror(errno), errno); + return false; + } + base = ret; + + memset(&addr, 0, sizeof(addr)); + addr.salg_family = AF_ALG; + strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type)); + + proc = fopen("/proc/crypto", "r"); + if (!proc) { + printf("Unable to open /proc/crypto\n"); + return false; + } + + driver_name = NULL; + is_shash = false; + match = false; + + /* Look through /proc/crypto for a driver with kernel mode FP usage */ + while (!match) { + c = fgets(buf, sizeof(buf), proc); + if (!c) { + if (feof(proc)) { + printf("Nothing found in /proc/crypto\n"); + return false; + } + continue; + } + + /* Algorithm descriptions are separated by a blank line */ + if (*c == '\n') { + if (is_shash && driver_name) { + for (i = 0; i < ARRAY_SIZE(drivers); i++) { + if (strcmp(drivers[i], + driver_name) == 0) { + match = true; + } + } + } + + if (!match) { + digest_len = 0; + + free(driver_name); + driver_name = NULL; + + free(alg_name); + alg_name = NULL; + + is_shash = false; + } + continue; + } + + /* Remove trailing newline */ + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + /* Find the field/value separator and start of the value */ + c = strchr(buf, ':'); + if (!c) + continue; + c += 2; + + if (strncmp(buf, "digestsize", strlen("digestsize")) == 0) + sscanf(c, "%d", &digest_len); + + if (strncmp(buf, "name", strlen("name")) == 0) + alg_name = strdup(c); + + if (strncmp(buf, "driver", strlen("driver")) == 0) + driver_name = strdup(c); + + if (strncmp(buf, "type", strlen("type")) == 0) + if (strncmp(c, "shash", strlen("shash")) == 0) + is_shash = true; + } + + strncpy((char *)addr.salg_name, alg_name, + sizeof(addr.salg_name) - 1); + + ret = bind(base, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + printf("Failed to bind %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + ret = accept(base, NULL, 0); + if (ret < 0) { + printf("Failed to accept %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + sock = ret; + + ret = pipe(zerocopy); + if (ret != 0) { + printf("Failed to create zerocopy pipe: %s (%d)\n", + strerror(errno), errno); + return false; + } + + ref = malloc(digest_len); + if (!ref) { + printf("Failed to allocated %d byte reference\n", digest_len); + return false; + } + + digest = malloc(digest_len); + if (!digest) { + printf("Failed to allocated %d byte digest\n", digest_len); + return false; + } + + return true; +} + +static bool compute_digest(void *buf) +{ + struct iovec iov; + int ret, wrote; + + iov = data_iov; + while (iov.iov_len) { + ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT); + if (ret < 0) { + printf("Failed to send buffer: %s (%d)\n", + strerror(errno), errno); + return false; + } + + wrote = ret; + ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0); + if (ret < 0) { + printf("Failed to splice buffer: %s (%d)\n", + strerror(errno), errno); + } else if (ret != wrote) { + printf("Short splice: %d < %d\n", ret, wrote); + } + + iov.iov_len -= wrote; + iov.iov_base += wrote; + } + +reread: + ret = recv(sock, buf, digest_len, 0); + if (ret == 0) { + printf("No digest returned\n"); + return false; + } + if (ret != digest_len) { + if (errno == -EAGAIN) + goto reread; + printf("Failed to get digest: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return true; +} + +int main(void) +{ + char *data; + struct sigaction sa; + int ret; + + /* Ensure we have unbuffered output */ + setvbuf(stdout, NULL, _IOLBF, 0); + + /* The parent will communicate with us via signals */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + + sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR2, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR2 handler: %s (%d)\n", + strerror(errno), errno); + + data = malloc(DATA_SIZE); + if (!data) { + printf("Failed to allocate data buffer\n"); + return EXIT_FAILURE; + } + memset(data, 0, DATA_SIZE); + + data_iov.iov_base = data; + data_iov.iov_len = DATA_SIZE; + + /* + * If we can't create a socket assume it's a lack of system + * support and fall back to a basic FPSIMD test for the + * benefit of fp-stress. + */ + if (!create_socket()) { + execl("./fpsimd-test", "./fpsimd-test", NULL); + printf("Failed to fall back to fspimd-test: %d (%s)\n", + errno, strerror(errno)); + return EXIT_FAILURE; + } + + /* + * Compute a reference digest we hope is repeatable, we do + * this at runtime partly to make it easier to play with + * parameters. + */ + if (!compute_digest(ref)) { + printf("Failed to compute reference digest\n"); + return EXIT_FAILURE; + } + + printf("AF_ALG using %s\n", alg_name); + + while (true) { + if (!compute_digest(digest)) { + printf("Failed to compute digest, iter=%d\n", iter); + return EXIT_FAILURE; + } + + if (memcmp(ref, digest, digest_len) != 0) { + printf("Digest mismatch, iter=%d\n", iter); + return EXIT_FAILURE; + } + + iter++; + } + + return EXIT_FAILURE; +} diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile index 6d29cfde43a2..0a77f35295fb 100644 --- a/tools/testing/selftests/arm64/tags/Makefile +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -2,6 +2,5 @@ CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := tags_test -TEST_PROGS := run_tags_test.sh include ../../lib.mk diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh deleted file mode 100755 index 745f11379930..000000000000 --- a/tools/testing/selftests/arm64/tags/run_tags_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "--------------------" -echo "running tags test" -echo "--------------------" -./tags_test -if [ $? -ne 0 ]; then - echo "[FAIL]" -else - echo "[PASS]" -fi diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c index 955f87c1170d..8ae26e496c89 100644 --- a/tools/testing/selftests/arm64/tags/tags_test.c +++ b/tools/testing/selftests/arm64/tags/tags_test.c @@ -17,19 +17,21 @@ int main(void) static int tbi_enabled = 0; unsigned long tag = 0; struct utsname *ptr; - int err; + + ksft_print_header(); + ksft_set_plan(1); if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) tbi_enabled = 1; ptr = (struct utsname *)malloc(sizeof(*ptr)); if (!ptr) - ksft_exit_fail_msg("Failed to allocate utsname buffer\n"); + ksft_exit_fail_perror("Failed to allocate utsname buffer"); if (tbi_enabled) tag = 0x42; ptr = (struct utsname *)SET_TAG(ptr, tag); - err = uname(ptr); + ksft_test_result(!uname(ptr), "Syscall successful with tagged address\n"); free(ptr); - return err; + ksft_finished(); } diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 0445ac38bc07..901349da680f 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,11 +1,11 @@ bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 -fexit_sleep # The test never returns. The remaining tests cannot start. kprobe_multi_bench_attach # needs CONFIG_FPROBE kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 +tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c34adf39eeb2..3ebd77206f98 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,9 +1,5 @@ # TEMPORARY # Alphabetical order -exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) verifier_iterating_callbacks -verifier_arena # JIT does not support arena -arena_htab # JIT does not support arena -arena_atomics diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0b3887b3d2d..dd49c1d23a60 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 567491f3e1b5..68a51dcc0669 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -34,10 +34,12 @@ #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) #define __arena __attribute__((address_space(1))) +#define __arena_global __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ #else #define __arena +#define __arena_global SEC(".addr_space.1") #define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) #define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) #endif diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 3d9e4b8c6b81..828556cdc2f0 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -163,7 +163,7 @@ struct bpf_iter_task_vma; extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, struct task_struct *task, - unsigned long addr) __ksym; + __u64 addr) __ksym; extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; @@ -351,6 +351,7 @@ l_true: \ l_continue:; \ }) #else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define can_loop \ ({ __label__ l_break, l_continue; \ bool ret = true; \ @@ -376,6 +377,33 @@ l_true: \ l_break: break; \ l_continue:; \ }) +#else +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define cond_break \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: break; \ + l_continue:; \ + }) +#endif #endif #ifndef bpf_nop_mov @@ -524,7 +552,7 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *value), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index be91a6919315..3b6675ab4086 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -77,5 +77,5 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, struct bpf_key *trusted_keyring) __ksym; extern bool bpf_session_is_return(void) __ksym __weak; -extern long *bpf_session_cookie(void) __ksym __weak; +extern __u64 *bpf_session_cookie(void) __ksym __weak; #endif diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..fd28c1157bd3 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -18,6 +18,7 @@ #include <linux/in6.h> #include <linux/un.h> #include <net/sock.h> +#include <linux/namei.h> #include "bpf_testmod.h" #include "bpf_testmod_kfunc.h" @@ -53,6 +54,13 @@ struct bpf_testmod_struct_arg_4 { int b; }; +struct bpf_testmod_struct_arg_5 { + char a; + short b; + int c; + long d; +}; + __bpf_hook_start(); noinline int @@ -111,6 +119,15 @@ bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e, } noinline int +bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, + short g, struct bpf_testmod_struct_arg_5 h, long i) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e + + f + g + h.a + h.b + h.c + h.d + i; + return bpf_testmod_test_struct_arg_result; +} + +noinline int bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { bpf_testmod_test_struct_arg_result = a->a; return bpf_testmod_test_struct_arg_result; @@ -154,6 +171,42 @@ __bpf_kfunc void bpf_kfunc_common_test(void) { } +__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, + struct bpf_dynptr *ptr__nullable) +{ +} + +__bpf_kfunc struct bpf_testmod_ctx * +bpf_testmod_ctx_create(int *err) +{ + struct bpf_testmod_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) { + *err = -ENOMEM; + return NULL; + } + refcount_set(&ctx->usage, 1); + + return ctx; +} + +static void testmod_free_cb(struct rcu_head *head) +{ + struct bpf_testmod_ctx *ctx; + + ctx = container_of(head, struct bpf_testmod_ctx, rcu); + kfree(ctx); +} + +__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) +{ + if (!ctx) + return; + if (refcount_dec_and_test(&ctx->usage)) + call_rcu(&ctx->rcu, testmod_free_cb); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -269,6 +322,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; struct bpf_testmod_struct_arg_3 *struct_arg3; struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; + struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; int i = 1; while (bpf_testmod_return_ptr(i)) @@ -283,6 +337,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void *)20, struct_arg4); (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19, (void *)20, struct_arg4, 23); + (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, + 21, 22, struct_arg5, 27); (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); @@ -358,13 +414,133 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; +/* bpf_testmod_uprobe sysfs attribute is so far enabled for x86_64 only, + * please see test_uretprobe_regs_change test + */ +#ifdef __x86_64__ + +static int +uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, + struct pt_regs *regs) + +{ + regs->ax = 0x12345678deadbeef; + regs->cx = 0x87654321feebdaed; + regs->r11 = (u64) -1; + return true; +} + +struct testmod_uprobe { + struct path path; + loff_t offset; + struct uprobe_consumer consumer; +}; + +static DEFINE_MUTEX(testmod_uprobe_mutex); + +static struct testmod_uprobe uprobe = { + .consumer.ret_handler = uprobe_ret_handler, +}; + +static int testmod_register_uprobe(loff_t offset) +{ + int err = -EBUSY; + + if (uprobe.offset) + return -EBUSY; + + mutex_lock(&testmod_uprobe_mutex); + + if (uprobe.offset) + goto out; + + err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path); + if (err) + goto out; + + err = uprobe_register_refctr(d_real_inode(uprobe.path.dentry), + offset, 0, &uprobe.consumer); + if (err) + path_put(&uprobe.path); + else + uprobe.offset = offset; + +out: + mutex_unlock(&testmod_uprobe_mutex); + return err; +} + +static void testmod_unregister_uprobe(void) +{ + mutex_lock(&testmod_uprobe_mutex); + + if (uprobe.offset) { + uprobe_unregister(d_real_inode(uprobe.path.dentry), + uprobe.offset, &uprobe.consumer); + uprobe.offset = 0; + } + + mutex_unlock(&testmod_uprobe_mutex); +} + +static ssize_t +bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t len) +{ + unsigned long offset = 0; + int err = 0; + + if (kstrtoul(buf, 0, &offset)) + return -EINVAL; + + if (offset) + err = testmod_register_uprobe(offset); + else + testmod_unregister_uprobe(); + + return err ?: strlen(buf); +} + +static struct bin_attribute bin_attr_bpf_testmod_uprobe_file __ro_after_init = { + .attr = { .name = "bpf_testmod_uprobe", .mode = 0666, }, + .write = bpf_testmod_uprobe_write, +}; + +static int register_bpf_testmod_uprobe(void) +{ + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); +} + +static void unregister_bpf_testmod_uprobe(void) +{ + testmod_unregister_uprobe(); + sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file); +} + +#else +static int register_bpf_testmod_uprobe(void) +{ + return 0; +} + +static void unregister_bpf_testmod_uprobe(void) { } +#endif + BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) +BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test) +BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) +BTF_ID_LIST(bpf_testmod_dtor_ids) +BTF_ID(struct, bpf_testmod_ctx) +BTF_ID(func, bpf_testmod_ctx_release) + static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, .set = &bpf_testmod_common_kfunc_ids, @@ -820,7 +996,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +1011,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +1047,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; @@ -898,6 +1074,12 @@ extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) { + const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = { + { + .btf_id = bpf_testmod_dtor_ids[0], + .kfunc_btf_id = bpf_testmod_dtor_ids[1] + }, + }; int ret; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); @@ -906,13 +1088,22 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, + ARRAY_SIZE(bpf_testmod_dtors), + THIS_MODULE); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) return -EINVAL; sock = NULL; mutex_init(&sock_lock); - return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + if (ret < 0) + return ret; + ret = register_bpf_testmod_uprobe(); + if (ret < 0) + return ret; + return 0; } static void bpf_testmod_exit(void) @@ -927,6 +1118,7 @@ static void bpf_testmod_exit(void) bpf_kfunc_close_sock(); sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); + unregister_bpf_testmod_uprobe(); } module_init(bpf_testmod_init); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index b0d586a6751f..e587a79f2239 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -80,6 +80,11 @@ struct sendmsg_args { int msglen; }; +struct bpf_testmod_ctx { + struct callback_head rcu; + refcount_t usage; +}; + struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; @@ -134,4 +139,9 @@ int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym; int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym; int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym; +void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym; + +struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym; +void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym; + #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index eeabd798bc3a..4ca84c8d9116 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,9 +58,12 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_SKBMOD=y +CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y @@ -80,8 +83,22 @@ CONFIG_NETFILTER_XT_TARGET_CT=y CONFIG_NETKIT=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_TABLES=y +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NETFILTER_INGRESS=y +CONFIG_NF_FLOW_TABLE=y +CONFIG_NF_FLOW_TABLE_INET=y +CONFIG_NETFILTER_NETLINK=y +CONFIG_NFT_FLOW_OFFLOAD=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 35250e6cde7f..e0cba4178e41 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -94,7 +94,8 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl if (settimeo(fd, opts->timeout_ms)) goto error_close; - if (opts->post_socket_cb && opts->post_socket_cb(fd, NULL)) { + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) { log_err("Failed to call post_socket_cb"); goto error_close; } @@ -105,7 +106,7 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl } if (type == SOCK_STREAM) { - if (listen(fd, 1) < 0) { + if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { log_err("Failed to listed on socket"); goto error_close; } @@ -118,22 +119,32 @@ error_close: return -1; } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts) { - struct network_helper_opts opts = { - .timeout_ms = timeout_ms, - }; struct sockaddr_storage addr; socklen_t addrlen; + if (!opts) + opts = &default_opts; + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); + return __start_server(type, (struct sockaddr *)&addr, addrlen, opts); } -static int reuseport_cb(int fd, const struct post_socket_opts *opts) +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + }; + + return start_server_str(family, type, addr_str, port, &opts); +} + +static int reuseport_cb(int fd, void *opts) { int on = 1; @@ -238,6 +249,34 @@ error_close: return -1; } +int client_socket(int family, int type, + const struct network_helper_opts *opts) +{ + int fd; + + if (!opts) + opts = &default_opts; + + fd = socket(family, type, opts->proto); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, opts->timeout_ms)) + goto error_close; + + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, socklen_t addrlen, const bool must_fail) @@ -273,15 +312,12 @@ int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t add if (!opts) opts = &default_opts; - fd = socket(addr->ss_family, type, opts->proto); + fd = client_socket(addr->ss_family, type, opts); if (fd < 0) { log_err("Failed to create client socket"); return -1; } - if (settimeo(fd, opts->timeout_ms)) - goto error_close; - if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail)) goto error_close; @@ -292,66 +328,21 @@ error_close: return -1; } -int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) +int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts) { struct sockaddr_storage addr; - struct sockaddr_in *addr_in; - socklen_t addrlen, optlen; - int fd, type, protocol; + socklen_t addrlen; if (!opts) opts = &default_opts; - optlen = sizeof(type); - - if (opts->type) { - type = opts->type; - } else { - if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { - log_err("getsockopt(SOL_TYPE)"); - return -1; - } - } - - if (opts->proto) { - protocol = opts->proto; - } else { - if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { - log_err("getsockopt(SOL_PROTOCOL)"); - return -1; - } - } - addrlen = sizeof(addr); if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); return -1; } - addr_in = (struct sockaddr_in *)&addr; - fd = socket(addr_in->sin_family, type, protocol); - if (fd < 0) { - log_err("Failed to create client socket"); - return -1; - } - - if (settimeo(fd, opts->timeout_ms)) - goto error_close; - - if (opts->cc && opts->cc[0] && - setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc, - strlen(opts->cc) + 1)) - goto error_close; - - if (!opts->noconnect) - if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) - goto error_close; - - return fd; - -error_close: - save_errno_close(fd); - return -1; + return connect_to_addr(type, &addr, addrlen, opts); } int connect_to_fd(int server_fd, int timeout_ms) @@ -359,8 +350,23 @@ int connect_to_fd(int server_fd, int timeout_ms) struct network_helper_opts opts = { .timeout_ms = timeout_ms, }; + int type, protocol; + socklen_t optlen; + + optlen = sizeof(type); + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); + return -1; + } + + optlen = sizeof(protocol); + if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { + log_err("getsockopt(SOL_PROTOCOL)"); + return -1; + } + opts.proto = protocol; - return connect_to_fd_opts(server_fd, &opts); + return connect_to_fd_opts(server_fd, type, &opts); } int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 883c7ea9d8d5..aac5b94d6379 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -21,16 +21,22 @@ typedef __u16 __sum16; #define VIP_NUM 5 #define MAGIC_BYTES 123 -struct post_socket_opts {}; - struct network_helper_opts { - const char *cc; int timeout_ms; bool must_fail; - bool noconnect; - int type; int proto; - int (*post_socket_cb)(int fd, const struct post_socket_opts *opts); + /* +ve: Passed to listen() as-is. + * 0: Default when the test does not set + * a particular value during the struct init. + * It is changed to 1 before passing to listen(). + * Most tests only have one on-going connection. + * -ve: It is changed to 0 before passing to listen(). + * It is useful to force syncookie without + * changing the "tcp_syncookies" sysctl from 1 to 2. + */ + int backlog; + int (*post_socket_cb)(int fd, void *opts); + void *cb_opts; }; /* ipv4 test vector */ @@ -50,6 +56,8 @@ struct ipv6_packet { extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, @@ -58,10 +66,12 @@ int *start_reuseport_server(int family, int type, const char *addr_str, int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len, const struct network_helper_opts *opts); void free_fds(int *fds, unsigned int nr_close_fds); +int client_socket(int family, int type, + const struct network_helper_opts *opts); int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len, const struct network_helper_opts *opts); int connect_to_fd(int server_fd, int timeout_ms); -int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); +int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c index 0807a48a58ee..26e7c06c6cb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -146,6 +146,22 @@ static void test_xchg(struct arena_atomics *skel) ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result"); } +static void test_uaf(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.uaf); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); +} + void test_arena_atomics(void) { struct arena_atomics *skel; @@ -180,6 +196,8 @@ void test_arena_atomics(void) test_cmpxchg(skel); if (test__start_subtest("xchg")) test_xchg(skel); + if (test__start_subtest("uaf")) + test_uaf(skel); cleanup: arena_atomics__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 4407ea428e77..070c52c312e5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -451,7 +451,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 1000; + attr.sample_freq = 10000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index b30ff6b3b81a..a4a1f93878d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -104,6 +104,7 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); + ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0"); ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); @@ -122,6 +123,12 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); + ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone"); + ASSERT_EQ(skel->bss->test_ct_zone_dir_enoent_lookup, -ENOENT, "Test ENOENT for lookup with wrong ct zone dir"); + ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone"); + end: if (client_fd != -1) close(client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 0aca02532794..63422f4f3896 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -23,6 +23,11 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; +struct cb_opts { + const char *cc; + int map_fd; +}; + static int settcpca(int fd, const char *tcp_ca) { int err; @@ -34,55 +39,66 @@ static int settcpca(int fd, const char *tcp_ca) return 0; } -static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) +static bool start_test(char *addr_str, + const struct network_helper_opts *srv_opts, + const struct network_helper_opts *cli_opts, + int *srv_fd, int *cli_fd) { - int lfd = -1, fd = -1; - int err; + *srv_fd = start_server_str(AF_INET6, SOCK_STREAM, addr_str, 0, srv_opts); + if (!ASSERT_NEQ(*srv_fd, -1, "start_server_str")) + goto err; - lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); - if (!ASSERT_NEQ(lfd, -1, "socket")) - return; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (!ASSERT_NEQ(fd, -1, "socket")) { - close(lfd); - return; - } + /* connect to server */ + *cli_fd = connect_to_fd_opts(*srv_fd, SOCK_STREAM, cli_opts); + if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts")) + goto err; - if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca)) - goto done; + return true; - if (sk_stg_map) { - err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, - &expected_stg, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) - goto done; +err: + if (*srv_fd != -1) { + close(*srv_fd); + *srv_fd = -1; } + if (*cli_fd != -1) { + close(*cli_fd); + *cli_fd = -1; + } + return false; +} - /* connect to server */ - err = connect_fd_to_fd(fd, lfd, 0); - if (!ASSERT_NEQ(err, -1, "connect")) - goto done; - - if (sk_stg_map) { - int tmp_stg; +static void do_test(const struct network_helper_opts *opts) +{ + int lfd = -1, fd = -1; - err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, - &tmp_stg); - if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || - !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) - goto done; - } + if (!start_test(NULL, opts, opts, &lfd, &fd)) + goto done; ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); done: - close(lfd); - close(fd); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); +} + +static int cc_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + + return settcpca(fd, cb_opts->cc); } static void test_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cubic *cubic_skel; struct bpf_link *link; @@ -96,7 +112,7 @@ static void test_cubic(void) return; } - do_test("bpf_cubic", NULL); + do_test(&opts); ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); @@ -104,8 +120,37 @@ static void test_cubic(void) bpf_cubic__destroy(cubic_skel); } +static int stg_post_socket_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + int err; + + err = settcpca(fd, cb_opts->cc); + if (err) + return err; + + err = bpf_map_update_elem(cb_opts->map_fd, &fd, + &expected_stg, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) + return err; + + return 0; +} + static void test_dctcp(void) { + struct cb_opts cb_opts = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; + struct network_helper_opts cli_opts = { + .post_socket_cb = stg_post_socket_cb, + .cb_opts = &cb_opts, + }; + int lfd = -1, fd = -1, tmp_stg, err; struct bpf_dctcp *dctcp_skel; struct bpf_link *link; @@ -119,11 +164,58 @@ static void test_dctcp(void) return; } - do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); + cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map); + if (!start_test(NULL, &opts, &cli_opts, &lfd, &fd)) + goto done; + + err = bpf_map_lookup_elem(cb_opts.map_fd, &fd, &tmp_stg); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) + goto done; + + ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); +done: bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); +} + +static void test_dctcp_autoattach_map(void) +{ + struct cb_opts cb_opts = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; + struct bpf_dctcp *dctcp_skel; + struct bpf_link *link; + + dctcp_skel = bpf_dctcp__open_and_load(); + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) + return; + + bpf_map__set_autoattach(dctcp_skel->maps.dctcp, true); + bpf_map__set_autoattach(dctcp_skel->maps.dctcp_nouse, false); + + if (!ASSERT_OK(bpf_dctcp__attach(dctcp_skel), "bpf_dctcp__attach")) + goto destroy; + + /* struct_ops is auto-attached */ + link = dctcp_skel->links.dctcp; + if (!ASSERT_OK_PTR(link, "link")) + goto destroy; + + do_test(&opts); + +destroy: + bpf_dctcp__destroy(dctcp_skel); } static char *err_str; @@ -171,11 +263,22 @@ static void test_invalid_license(void) static void test_dctcp_fallback(void) { int err, lfd = -1, cli_fd = -1, srv_fd = -1; - struct network_helper_opts opts = { - .cc = "cubic", - }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link = NULL; + struct cb_opts dctcp = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts srv_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &dctcp, + }; + struct cb_opts cubic = { + .cc = "cubic", + }; + struct network_helper_opts cli_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cubic, + }; char srv_cc[16]; socklen_t cc_len = sizeof(srv_cc); @@ -190,13 +293,7 @@ static void test_dctcp_fallback(void) if (!ASSERT_OK_PTR(link, "dctcp link")) goto done; - lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (!ASSERT_GE(lfd, 0, "lfd") || - !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp")) - goto done; - - cli_fd = connect_to_fd_opts(lfd, &opts); - if (!ASSERT_GE(cli_fd, 0, "cli_fd")) + if (!start_test("::1", &srv_opts, &cli_opts, &lfd, &cli_fd)) goto done; srv_fd = accept(lfd, NULL, 0); @@ -297,6 +394,13 @@ static void test_unsupp_cong_op(void) static void test_update_ca(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -307,25 +411,34 @@ static void test_update_ca(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; - do_test("tcp_ca_update", NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_update_2); ASSERT_OK(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } static void test_update_wrong(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -336,24 +449,33 @@ static void test_update_wrong(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; - do_test("tcp_ca_update", NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_wrong); ASSERT_ERR(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } static void test_mixed_links(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link, *link_nl; int err; @@ -363,12 +485,13 @@ static void test_mixed_links(void) return; link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); - ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl")) + goto out; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_no_link); @@ -376,6 +499,7 @@ static void test_mixed_links(void) bpf_link__destroy(link); bpf_link__destroy(link_nl); +out: tcp_ca_update__destroy(skel); } @@ -418,7 +542,8 @@ static void test_link_replace(void) bpf_link__destroy(link); link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); - ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd")) + goto out; /* BPF_F_REPLACE with a wrong old map Fd. It should fail! * @@ -441,6 +566,7 @@ static void test_link_replace(void) bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -455,6 +581,13 @@ static void test_tcp_ca_kfunc(void) static void test_cc_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cc_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cc_cubic *cc_cubic_skel; struct bpf_link *link; @@ -468,7 +601,7 @@ static void test_cc_cubic(void) return; } - do_test("bpf_cc_cubic", NULL); + do_test(&opts); bpf_link__destroy(link); bpf_cc_cubic__destroy(cc_cubic_skel); @@ -506,4 +639,6 @@ void test_bpf_tcp_ca(void) test_tcp_ca_kfunc(); if (test__start_subtest("cc_cubic")) test_cc_cubic(); + if (test__start_subtest("dctcp_autoattach_map")) + test_dctcp_autoattach_map(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 4c6ada5b270b..73f669014b69 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -45,12 +45,6 @@ err_out: return err; } -struct scale_test_def { - const char *file; - enum bpf_prog_type attach_type; - bool fails; -}; - static void scale_test(const char *file, enum bpf_prog_type attach_type, bool should_fail) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c new file mode 100644 index 000000000000..bfbe795823a2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" + +/* Fabricate base, split BTF with references to base types needed; then create + * split BTF with distilled base BTF and ensure expectations are met: + * - only referenced base types from split BTF are present + * - struct/union/enum are represented as empty unless anonymous, when they + * are represented in full in split BTF + */ +static void test_distilled_base(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + btf__add_struct(btf1, "s1", 8); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* int *f1; */ + /* } */ + btf__add_struct(btf1, "", 12); /* [4] struct { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf1, "f2", 3, 32, 0); /* struct s1 f2; */ + /* } */ + btf__add_int(btf1, "unsigned int", 4, 0); /* [5] unsigned int */ + btf__add_union(btf1, "u1", 12); /* [6] union u1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf1, "f2", 2, 0, 0); /* int *f2; */ + /* } */ + btf__add_union(btf1, "", 4); /* [7] union { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_enum(btf1, "e1", 4); /* [8] enum e1 { */ + btf__add_enum_value(btf1, "v1", 1); /* v1 = 1; */ + /* } */ + btf__add_enum(btf1, "", 4); /* [9] enum { */ + btf__add_enum_value(btf1, "av1", 2); /* av1 = 2; */ + /* } */ + btf__add_enum64(btf1, "e641", 8, true); /* [10] enum64 { */ + btf__add_enum64_value(btf1, "v1", 1024); /* v1 = 1024; */ + /* } */ + btf__add_enum64(btf1, "", 8, true); /* [11] enum64 { */ + btf__add_enum64_value(btf1, "v1", 1025); /* v1 = 1025; */ + /* } */ + btf__add_struct(btf1, "unneeded", 4); /* [12] struct unneeded { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_struct(btf1, "embedded", 4); /* [13] struct embedded { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_func_proto(btf1, 1); /* [14] int (*)(int *p1); */ + btf__add_func_param(btf1, "p1", 1); + + btf__add_array(btf1, 1, 1, 3); /* [15] int [3]; */ + + btf__add_struct(btf1, "from_proto", 4); /* [16] struct from_proto { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_union(btf1, "u1", 4); /* [17] union u1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_ptr(btf2, 3); /* [18] ptr to struct s1 */ + /* add ptr to struct anon */ + btf__add_ptr(btf2, 4); /* [19] ptr to struct (anon) */ + btf__add_const(btf2, 6); /* [20] const union u1 */ + btf__add_restrict(btf2, 7); /* [21] restrict union (anon) */ + btf__add_volatile(btf2, 8); /* [22] volatile enum e1 */ + btf__add_typedef(btf2, "et", 9); /* [23] typedef enum (anon) */ + btf__add_const(btf2, 10); /* [24] const enum64 e641 */ + btf__add_ptr(btf2, 11); /* [25] restrict enum64 (anon) */ + btf__add_struct(btf2, "with_embedded", 4); /* [26] struct with_embedded { */ + btf__add_field(btf2, "f1", 13, 0, 0); /* struct embedded f1; */ + /* } */ + btf__add_func(btf2, "fn", BTF_FUNC_STATIC, 14); /* [27] int fn(int p1); */ + btf__add_typedef(btf2, "arraytype", 15); /* [28] typedef int[3] foo; */ + btf__add_func_proto(btf2, 1); /* [29] int (*)(struct from proto p1); */ + btf__add_func_param(btf2, "p1", 16); + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[18] PTR '(anon)' type_id=3", + "[19] PTR '(anon)' type_id=4", + "[20] CONST '(anon)' type_id=6", + "[21] RESTRICT '(anon)' type_id=7", + "[22] VOLATILE '(anon)' type_id=8", + "[23] TYPEDEF 'et' type_id=9", + "[24] CONST '(anon)' type_id=10", + "[25] PTR '(anon)' type_id=11", + "[26] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=13 bits_offset=0", + "[27] FUNC 'fn' type_id=14 linkage=static", + "[28] TYPEDEF 'arraytype' type_id=15", + "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=16"); + + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(8, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's1' size=8 vlen=0", + "[3] UNION 'u1' size=12 vlen=0", + "[4] ENUM 'e1' encoding=UNSIGNED size=4 vlen=0", + "[5] ENUM 'e641' encoding=UNSIGNED size=8 vlen=0", + "[6] STRUCT 'embedded' size=4 vlen=0", + "[7] STRUCT 'from_proto' size=4 vlen=0", + /* split BTF; these types should match split BTF above from 17-28, with + * updated type id references + */ + "[8] PTR '(anon)' type_id=2", + "[9] PTR '(anon)' type_id=20", + "[10] CONST '(anon)' type_id=3", + "[11] RESTRICT '(anon)' type_id=21", + "[12] VOLATILE '(anon)' type_id=4", + "[13] TYPEDEF 'et' type_id=22", + "[14] CONST '(anon)' type_id=5", + "[15] PTR '(anon)' type_id=23", + "[16] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=6 bits_offset=0", + "[17] FUNC 'fn' type_id=24 linkage=static", + "[18] TYPEDEF 'arraytype' type_id=25", + "[19] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=7", + /* split BTF types added from original base BTF below */ + "[20] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=32", + "[21] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[22] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[23] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[24] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[25] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3"); + + if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[18] PTR '(anon)' type_id=3", + "[19] PTR '(anon)' type_id=30", + "[20] CONST '(anon)' type_id=6", + "[21] RESTRICT '(anon)' type_id=31", + "[22] VOLATILE '(anon)' type_id=8", + "[23] TYPEDEF 'et' type_id=32", + "[24] CONST '(anon)' type_id=10", + "[25] PTR '(anon)' type_id=33", + "[26] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=13 bits_offset=0", + "[27] FUNC 'fn' type_id=34 linkage=static", + "[28] TYPEDEF 'arraytype' type_id=35", + "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=16", + /* below here are (duplicate) anon base types added by distill + * process to split BTF. + */ + "[30] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[31] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[32] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[33] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[34] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[35] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3"); + +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* ensure we can cope with multiple types with the same name in + * distilled base BTF. In this case because sizes are different, + * we can still disambiguate them. + */ +static void test_distilled_base_multi(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* If a needed type is not present in the base BTF we wish to relocate + * with, btf__relocate() should error our. + */ +static void test_distilled_base_missing_err(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf5 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) + return; + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ + VALIDATE_RAW_BTF( + btf5, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split"); + +cleanup: + btf__free(btf5); + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* With 2 types of same size in distilled base BTF, relocation should + * fail as we have no means to choose between them. + */ +static void test_distilled_base_multi_err(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf1), -EINVAL, "relocate_split"); +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* With 2 types of same size in base BTF, relocation should + * fail as we have no means to choose between them. + */ +static void test_distilled_base_multi_err2(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf5 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) + return; + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf5, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split"); +cleanup: + btf__free(btf5); + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* create split reference BTF from vmlinux + split BTF with a few type references; + * ensure the resultant split reference BTF is as expected, containing only types + * needed to disambiguate references from split BTF. + */ +static void test_distilled_base_vmlinux(void) +{ + struct btf *split_btf = NULL, *vmlinux_btf = btf__load_vmlinux_btf(); + struct btf *split_dist = NULL, *base_dist = NULL; + __s32 int_id, myint_id; + + if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux")) + return; + int_id = btf__find_by_name_kind(vmlinux_btf, "int", BTF_KIND_INT); + if (!ASSERT_GT(int_id, 0, "find_int")) + goto cleanup; + split_btf = btf__new_empty_split(vmlinux_btf); + if (!ASSERT_OK_PTR(split_btf, "new_split")) + goto cleanup; + myint_id = btf__add_typedef(split_btf, "myint", int_id); + btf__add_ptr(split_btf, myint_id); + + if (!ASSERT_EQ(btf__distill_base(split_btf, &base_dist, &split_dist), 0, + "distill_vmlinux_base")) + goto cleanup; + + if (!ASSERT_OK_PTR(split_dist, "split_distilled") || + !ASSERT_OK_PTR(base_dist, "base_dist")) + goto cleanup; + VALIDATE_RAW_BTF( + split_dist, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] TYPEDEF 'myint' type_id=1", + "[3] PTR '(anon)' type_id=2"); + +cleanup: + btf__free(split_dist); + btf__free(base_dist); + btf__free(split_btf); + btf__free(vmlinux_btf); +} + +void test_btf_distill(void) +{ + if (test__start_subtest("distilled_base")) + test_distilled_base(); + if (test__start_subtest("distilled_base_multi")) + test_distilled_base_multi(); + if (test__start_subtest("distilled_base_missing_err")) + test_distilled_base_missing_err(); + if (test__start_subtest("distilled_base_multi_err")) + test_distilled_base_multi_err(); + if (test__start_subtest("distilled_base_multi_err2")) + test_distilled_base_multi_err2(); + if (test__start_subtest("distilled_base_vmlinux")) + test_distilled_base_vmlinux(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c new file mode 100644 index 000000000000..32159d3eb281 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" +#include "bpf/libbpf_internal.h" + +struct field_data { + __u32 ids[5]; + const char *strs[5]; +} fields[] = { + { .ids = {}, .strs = {} }, + { .ids = {}, .strs = { "int" } }, + { .ids = {}, .strs = { "int64" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 2, 1 }, .strs = { "" } }, + { .ids = { 3, 1 }, .strs = { "s1", "f1", "f2" } }, + { .ids = { 1, 5 }, .strs = { "u1", "f1", "f2" } }, + { .ids = {}, .strs = { "e1", "v1", "v2" } }, + { .ids = {}, .strs = { "fw1" } }, + { .ids = { 1 }, .strs = { "t" } }, + { .ids = { 2 }, .strs = { "" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 3 }, .strs = { "" } }, + { .ids = { 1, 1, 3 }, .strs = { "", "p1", "p2" } }, + { .ids = { 13 }, .strs = { "func" } }, + { .ids = { 1 }, .strs = { "var1" } }, + { .ids = { 3 }, .strs = { "var2" } }, + { .ids = {}, .strs = { "float" } }, + { .ids = { 11 }, .strs = { "decltag" } }, + { .ids = { 6 }, .strs = { "typetag" } }, + { .ids = {}, .strs = { "e64", "eval1", "eval2", "eval3" } }, + { .ids = { 15, 16 }, .strs = { "datasec1" } } + +}; + +/* Fabricate BTF with various types and check BTF field iteration finds types, + * strings expected. + */ +void test_btf_field_iter(void) +{ + struct btf *btf = NULL; + int id; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + + btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf, "int64", 8, BTF_INT_SIGNED); /* [2] int64 */ + btf__add_ptr(btf, 1); /* [3] int * */ + btf__add_array(btf, 1, 2, 3); /* [4] int64[3] */ + btf__add_struct(btf, "s1", 12); /* [5] struct s1 { */ + btf__add_field(btf, "f1", 3, 0, 0); /* int *f1; */ + btf__add_field(btf, "f2", 1, 0, 0); /* int f2; */ + /* } */ + btf__add_union(btf, "u1", 12); /* [6] union u1 { */ + btf__add_field(btf, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf, "f2", 5, 0, 0); /* struct s1 f2; */ + /* } */ + btf__add_enum(btf, "e1", 4); /* [7] enum e1 { */ + btf__add_enum_value(btf, "v1", 1); /* v1 = 1; */ + btf__add_enum_value(btf, "v2", 2); /* v2 = 2; */ + /* } */ + + btf__add_fwd(btf, "fw1", BTF_FWD_STRUCT); /* [8] struct fw1; */ + btf__add_typedef(btf, "t", 1); /* [9] typedef int t; */ + btf__add_volatile(btf, 2); /* [10] volatile int64; */ + btf__add_const(btf, 1); /* [11] const int; */ + btf__add_restrict(btf, 3); /* [12] restrict int *; */ + btf__add_func_proto(btf, 1); /* [13] int (*)(int p1, int *p2); */ + btf__add_func_param(btf, "p1", 1); + btf__add_func_param(btf, "p2", 3); + + btf__add_func(btf, "func", BTF_FUNC_GLOBAL, 13);/* [14] int func(int p1, int *p2); */ + btf__add_var(btf, "var1", BTF_VAR_STATIC, 1); /* [15] static int var1; */ + btf__add_var(btf, "var2", BTF_VAR_STATIC, 3); /* [16] static int *var2; */ + btf__add_float(btf, "float", 4); /* [17] float; */ + btf__add_decl_tag(btf, "decltag", 11, -1); /* [18] decltag const int; */ + btf__add_type_tag(btf, "typetag", 6); /* [19] typetag union u1; */ + btf__add_enum64(btf, "e64", 8, true); /* [20] enum { */ + btf__add_enum64_value(btf, "eval1", 1000); /* eval1 = 1000, */ + btf__add_enum64_value(btf, "eval2", 2000); /* eval2 = 2000, */ + btf__add_enum64_value(btf, "eval3", 3000); /* eval3 = 3000 */ + /* } */ + btf__add_datasec(btf, "datasec1", 12); /* [21] datasec datasec1 */ + btf__add_datasec_var_info(btf, 15, 0, 4); + btf__add_datasec_var_info(btf, 16, 4, 8); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int64' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=3", + "[5] STRUCT 's1' size=12 vlen=2\n" + "\t'f1' type_id=3 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=0", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=5 bits_offset=0", + "[7] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[8] FWD 'fw1' fwd_kind=struct", + "[9] TYPEDEF 't' type_id=1", + "[10] VOLATILE '(anon)' type_id=2", + "[11] CONST '(anon)' type_id=1", + "[12] RESTRICT '(anon)' type_id=3", + "[13] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=3", + "[14] FUNC 'func' type_id=13 linkage=global", + "[15] VAR 'var1' type_id=1, linkage=static", + "[16] VAR 'var2' type_id=3, linkage=static", + "[17] FLOAT 'float' size=4", + "[18] DECL_TAG 'decltag' type_id=11 component_idx=-1", + "[19] TYPE_TAG 'typetag' type_id=6", + "[20] ENUM64 'e64' encoding=SIGNED size=8 vlen=3\n" + "\t'eval1' val=1000\n" + "\t'eval2' val=2000\n" + "\t'eval3' val=3000", + "[21] DATASEC 'datasec1' size=12 vlen=2\n" + "\ttype_id=15 offset=0 size=4\n" + "\ttype_id=16 offset=4 size=8"); + + for (id = 1; id < btf__type_cnt(btf); id++) { + struct btf_type *t = btf_type_by_id(btf, id); + struct btf_field_iter it_strs, it_ids; + int str_idx = 0, id_idx = 0; + __u32 *next_str, *next_id; + + if (!ASSERT_OK_PTR(t, "btf_type_by_id")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_strs, t, BTF_FIELD_ITER_STRS), + "iter_init_strs")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_ids, t, BTF_FIELD_ITER_IDS), + "iter_init_ids")) + break; + while ((next_str = btf_field_iter_next(&it_strs))) { + const char *str = btf__str_by_offset(btf, *next_str); + + if (!ASSERT_OK(strcmp(fields[id].strs[str_idx], str), "field_str_match")) + break; + str_idx++; + } + /* ensure no more strings are expected */ + ASSERT_EQ(fields[id].strs[str_idx], NULL, "field_str_cnt"); + + while ((next_id = btf_field_iter_next(&it_ids))) { + if (!ASSERT_EQ(*next_id, fields[id].ids[id_idx], "field_id_match")) + break; + id_idx++; + } + /* ensure no more ids are expected */ + ASSERT_EQ(fields[id].ids[id_idx], 0, "field_id_cnt"); + } + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index addf720428f7..9709c8db7275 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -32,7 +32,7 @@ static int run_test(int cgroup_fd, int server_fd, bool classid) goto out; } - fd = connect_to_fd_opts(server_fd, &opts); + fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts); if (fd < 0) err = -1; else @@ -52,7 +52,7 @@ void test_cgroup_v1v2(void) server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) return; - client_fd = connect_to_fd_opts(server_fd, &opts); + client_fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts); if (!ASSERT_GE(client_fd, 0, "client_fd")) { close(server_fd); return; diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index ecf89df78109..2570bd4b0cb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,11 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_global_mask_array_one_rcu", + "test_global_mask_array_rcu", + "test_global_mask_array_l2_rcu", + "test_global_mask_nested_rcu", + "test_global_mask_nested_deep_rcu", "test_cpumask_weight", }; diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 3b7c57fe55a5..08b6391f2f56 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -69,15 +69,17 @@ static struct test_case test_cases[] = { { N(SCHED_CLS, struct __sk_buff, tstamp), .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "w11 &= 3;" - "if w11 != 0x3 goto pc+2;" + "if w11 & 0x4 goto pc+1;" + "goto pc+4;" + "if w11 & 0x3 goto pc+1;" + "goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x2 goto pc+1;" + "if w11 & 0x4 goto pc+1;" "goto pc+2;" - "w11 &= -2;" + "w11 &= -4;" "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index f949647dbbc2..552a0875ca6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -21,13 +21,13 @@ static int do_sleep(void *skel) } #define STACK_SIZE (1024 * 1024) -static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { struct fexit_sleep_lskel *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; + char *child_stack = NULL; int err, fexit_cnt; fexit_skel = fexit_sleep_lskel__open_and_load(); @@ -38,6 +38,11 @@ void test_fexit_sleep(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; + child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (!ASSERT_NEQ(child_stack, MAP_FAILED, "mmap")) + goto cleanup; + cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) goto cleanup; @@ -78,5 +83,6 @@ void test_fexit_sleep(void) goto cleanup; cleanup: + munmap(child_stack, STACK_SIZE); fexit_sleep_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 596536def43d..49b1ffc9af1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -50,9 +50,9 @@ void serial_test_fexit_stress(void) out: for (i = 0; i < bpf_max_tramp_links; i++) { - if (link_fd[i]) + if (link_fd[i] > 0) close(link_fd[i]); - if (fexit_fd[i]) + if (fexit_fd[i] > 0) close(fexit_fd[i]); } free(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c index 5165b38f0e59..f7619e0ade10 100644 --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c @@ -29,8 +29,8 @@ static int open_pe(void) /* create perf event */ attr.size = sizeof(attr); - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c index 284764e7179f..4ddb8a5fece8 100644 --- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -158,15 +158,13 @@ static int send_frags6(int client) void test_bpf_ip_check_defrag_ok(bool ipv6) { + int family = ipv6 ? AF_INET6 : AF_INET; struct network_helper_opts rx_opts = { .timeout_ms = 1000, - .noconnect = true, }; struct network_helper_opts tx_ops = { .timeout_ms = 1000, - .type = SOCK_RAW, .proto = IPPROTO_RAW, - .noconnect = true, }; struct sockaddr_storage caddr; struct ip_check_defrag *skel; @@ -192,7 +190,7 @@ void test_bpf_ip_check_defrag_ok(bool ipv6) nstoken = open_netns(NS1); if (!ASSERT_OK_PTR(nstoken, "setns ns1")) goto out; - srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0); + srv_fd = start_server(family, SOCK_DGRAM, NULL, SERVER_PORT, 0); close_netns(nstoken); if (!ASSERT_GE(srv_fd, 0, "start_server")) goto out; @@ -201,18 +199,18 @@ void test_bpf_ip_check_defrag_ok(bool ipv6) nstoken = open_netns(NS0); if (!ASSERT_OK_PTR(nstoken, "setns ns0")) goto out; - client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops); + client_tx_fd = client_socket(family, SOCK_RAW, &tx_ops); close_netns(nstoken); - if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts")) + if (!ASSERT_GE(client_tx_fd, 0, "client_socket")) goto out; /* Open rx socket in ns0 */ nstoken = open_netns(NS0); if (!ASSERT_OK_PTR(nstoken, "setns ns0")) goto out; - client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts); + client_rx_fd = client_socket(family, SOCK_DGRAM, &rx_opts); close_netns(nstoken); - if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts")) + if (!ASSERT_GE(client_rx_fd, 0, "client_socket")) goto out; /* Bind rx socket to a premeditated port */ diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 2eb71559713c..5b743212292f 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -78,6 +78,7 @@ static struct kfunc_test_params kfunc_tests[] = { SYSCALL_TEST(kfunc_syscall_test, 0), SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0), TC_TEST(kfunc_call_test_static_unused_arg, 0), + TC_TEST(kfunc_call_ctx, 0), }; struct syscall_test_args { diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c new file mode 100644 index 000000000000..c8f4dcaac7c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2024 Meta Platforms, Inc */ + +#include <test_progs.h> +#include "test_kfunc_param_nullable.skel.h" + +void test_kfunc_param_nullable(void) +{ + RUN_TESTS(test_kfunc_param_nullable); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 2fb89de63bd2..77d07e0a4a55 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -183,6 +183,18 @@ static void test_linked_list_success(int mode, bool leave_in_map) if (!leave_in_map) clear_fields(skel->maps.bss_A); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts); + ASSERT_OK(ret, "global_list_push_pop_nested"); + ASSERT_OK(opts.retval, "global_list_push_pop_nested retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts); + ASSERT_OK(ret, "global_list_array_push_pop"); + ASSERT_OK(opts.retval, "global_list_array_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + if (mode == PUSH_POP) goto end; diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 274d2e033e39..d2ca32fa3b21 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -89,13 +89,8 @@ static int start_mptcp_server(int family, const char *addr_str, __u16 port, .timeout_ms = timeout_ms, .proto = IPPROTO_MPTCP, }; - struct sockaddr_storage addr; - socklen_t addrlen; - if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) - return -1; - - return start_server_addr(SOCK_STREAM, &addr, addrlen, &opts); + return start_server_str(family, SOCK_STREAM, addr_str, port, &opts); } static int verify_tsk(int map_fd, int client_fd) diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index e9300c96607d..9818f06c97c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -31,6 +31,28 @@ static void test_rbtree_add_nodes(void) rbtree__destroy(skel); } +static void test_rbtree_add_nodes_nested(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes_nested), &opts); + ASSERT_OK(ret, "rbtree_add_nodes_nested run"); + ASSERT_OK(opts.retval, "rbtree_add_nodes_nested retval"); + ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes_nested less_callback_ran"); + + rbtree__destroy(skel); +} + static void test_rbtree_add_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -53,6 +75,27 @@ static void test_rbtree_add_and_remove(void) rbtree__destroy(skel); } +static void test_rbtree_add_and_remove_array(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove_array), &opts); + ASSERT_OK(ret, "rbtree_add_and_remove_array"); + ASSERT_OK(opts.retval, "rbtree_add_and_remove_array retval"); + + rbtree__destroy(skel); +} + static void test_rbtree_first_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -104,8 +147,12 @@ void test_rbtree_success(void) { if (test__start_subtest("rbtree_add_nodes")) test_rbtree_add_nodes(); + if (test__start_subtest("rbtree_add_nodes_nested")) + test_rbtree_add_nodes_nested(); if (test__start_subtest("rbtree_add_and_remove")) test_rbtree_add_and_remove(); + if (test__start_subtest("rbtree_add_and_remove_array")) + test_rbtree_add_and_remove_array(); if (test__start_subtest("rbtree_first_and_remove")) test_rbtree_first_and_remove(); if (test__start_subtest("rbtree_api_release_aliasing")) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4c6f42dae409..da430df45aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,9 +12,11 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> + #include "test_ringbuf.lskel.h" #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" +#include "test_ringbuf_write.lskel.h" #define EDONE 7777 @@ -84,6 +86,58 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } +static void ringbuf_write_subtest(void) +{ + struct test_ringbuf_write_lskel *skel; + int page_size = getpagesize(); + size_t *mmap_ptr; + int err, rb_fd; + + skel = test_ringbuf_write_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->maps.ringbuf.max_entries = 0x4000; + + err = test_ringbuf_write_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + rb_fd = skel->maps.ringbuf.map_fd; + + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) + goto cleanup; + *mmap_ptr = 0x3000; + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new")) + goto cleanup; + + err = test_ringbuf_write_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup_ringbuf; + + skel->bss->discarded = 0; + skel->bss->passed = 0; + + /* trigger exactly two samples */ + syscall(__NR_getpgid); + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->discarded, 2, "discarded"); + ASSERT_EQ(skel->bss->passed, 0, "passed"); + + test_ringbuf_write_lskel__detach(skel); +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_write_lskel__destroy(skel); +} + static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); @@ -451,4 +505,6 @@ void test_ringbuf(void) ringbuf_n_subtest(); if (test__start_subtest("ringbuf_map_key")) ringbuf_map_key_subtest(); + if (test__start_subtest("ringbuf_write")) + ringbuf_write_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 920aee41bd58..6cc69900b310 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -156,7 +156,8 @@ static void test_send_signal_tracepoint(bool signal_thread) static void test_send_signal_perf(bool signal_thread) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 597d0467a926..ae87c00867ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -77,6 +77,12 @@ struct test { bool reuseport_has_conns; /* Add a connected socket to reuseport group */ }; +struct cb_opts { + int family; + int sotype; + bool reuseport; +}; + static __u32 duration; /* for CHECK macro */ static bool is_ipv6(const char *ip) @@ -142,19 +148,14 @@ static int make_socket(int sotype, const char *ip, int port, return fd; } -static int make_server(int sotype, const char *ip, int port, - struct bpf_program *reuseport_prog) +static int setsockopts(int fd, void *opts) { - struct sockaddr_storage addr = {0}; + struct cb_opts *co = (struct cb_opts *)opts; const int one = 1; - int err, fd = -1; - - fd = make_socket(sotype, ip, port, &addr); - if (fd < 0) - return -1; + int err = 0; /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ - if (sotype == SOCK_DGRAM) { + if (co->sotype == SOCK_DGRAM) { err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { @@ -163,7 +164,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { + if (co->sotype == SOCK_DGRAM && co->family == AF_INET6) { err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { @@ -172,7 +173,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_STREAM) { + if (co->sotype == SOCK_STREAM) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { @@ -181,7 +182,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (reuseport_prog) { + if (co->reuseport) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { @@ -190,19 +191,28 @@ static int make_server(int sotype, const char *ip, int port, } } - err = bind(fd, (void *)&addr, inetaddr_len(&addr)); - if (CHECK(err, "bind", "failed\n")) { - log_err("failed to bind listen socket"); - goto fail; - } +fail: + return err; +} - if (sotype == SOCK_STREAM) { - err = listen(fd, SOMAXCONN); - if (CHECK(err, "make_server", "listen")) { - log_err("failed to listen on port %d", port); - goto fail; - } - } +static int make_server(int sotype, const char *ip, int port, + struct bpf_program *reuseport_prog) +{ + struct cb_opts cb_opts = { + .family = is_ipv6(ip) ? AF_INET6 : AF_INET, + .sotype = sotype, + .reuseport = reuseport_prog, + }; + struct network_helper_opts opts = { + .backlog = SOMAXCONN, + .post_socket_cb = setsockopts, + .cb_opts = &cb_opts, + }; + int err, fd; + + fd = start_server_str(cb_opts.family, sotype, ip, port, &opts); + if (!ASSERT_OK_FD(fd, "start_server_str")) + return -1; /* Late attach reuseport prog so we can have one init path */ if (reuseport_prog) { @@ -406,18 +416,12 @@ static int udp_recv_send(int server_fd) } /* Reply from original destination address. */ - fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); - if (CHECK(fd < 0, "socket", "failed\n")) { + fd = start_server_addr(SOCK_DGRAM, dst_addr, sizeof(*dst_addr), NULL); + if (!ASSERT_OK_FD(fd, "start_server_addr")) { log_err("failed to create tx socket"); return -1; } - ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); - if (CHECK(ret, "bind", "failed\n")) { - log_err("failed to bind tx socket"); - goto out; - } - msg.msg_control = NULL; msg.msg_controllen = 0; n = sendmsg(fd, &msg, 0); @@ -629,9 +633,6 @@ static void run_lookup_prog(const struct test *t) * BPF socket lookup. */ if (t->reuseport_has_conns) { - struct sockaddr_storage addr = {}; - socklen_t len = sizeof(addr); - /* Add an extra socket to reuseport group */ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, @@ -639,12 +640,9 @@ static void run_lookup_prog(const struct test *t) if (reuse_conn_fd < 0) goto close; - /* Connect the extra socket to itself */ - err = getsockname(reuse_conn_fd, (void *)&addr, &len); - if (CHECK(err, "getsockname", "errno %d\n", errno)) - goto close; - err = connect(reuse_conn_fd, (void *)&addr, len); - if (CHECK(err, "connect", "errno %d\n", errno)) + /* Connect the extra socket to itself */ + err = connect_fd_to_fd(reuse_conn_fd, reuse_conn_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) goto close; } @@ -994,7 +992,7 @@ static void drop_on_reuseport(const struct test *t) err = update_lookup_map(t->sock_map, SERVER_A, server1); if (err) - goto detach; + goto close_srv1; /* second server on destination address we should never reach */ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index e91b59366030..9ce0e0e0b7da 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -29,6 +29,8 @@ #include "sockmap_helpers.h" +#define NO_FLAGS 0 + static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, int family, int sotype, int mapfd) { @@ -1376,7 +1378,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, int sock_mapfd, int nop_mapfd, - int verd_mapfd, enum redir_mode mode) + int verd_mapfd, enum redir_mode mode, + int send_flags) { const char *log_prefix = redir_mode_str(mode); unsigned int pass; @@ -1396,12 +1399,11 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, return; } - n = write(cli1, "a", 1); - if (n < 0) - FAIL_ERRNO("%s: write", log_prefix); - if (n == 0) - FAIL("%s: incomplete write", log_prefix); - if (n < 1) + /* Last byte is OOB data when send_flags has MSG_OOB bit set */ + n = xsend(cli1, "ab", 2, send_flags); + if (n >= 0 && n < 2) + FAIL("%s: incomplete send", log_prefix); + if (n < 2) return; key = SK_PASS; @@ -1416,6 +1418,25 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1, FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) FAIL("%s: incomplete recv", log_prefix); + + if (send_flags & MSG_OOB) { + /* Check that we can't read OOB while in sockmap */ + errno = 0; + n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); + if (n != -1 || errno != EOPNOTSUPP) + FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d", + log_prefix, n, errno); + + /* Remove peer1 from sockmap */ + xbpf_map_delete_elem(sock_mapfd, &(int){ 1 }); + + /* Check that OOB was dropped on redirect */ + errno = 0; + n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT); + if (n != -1 || errno != EINVAL) + FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d", + log_prefix, n, errno); + } } static void unix_redir_to_connected(int sotype, int sock_mapfd, @@ -1432,7 +1453,8 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, goto close0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, + mode, NO_FLAGS); xclose(c1); xclose(p1); @@ -1722,7 +1744,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (err) goto close_cli0; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, + mode, NO_FLAGS); xclose(c1); xclose(p1); @@ -1780,7 +1803,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (err) goto close; - pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, + mode, NO_FLAGS); xclose(c1); xclose(p1); @@ -1815,10 +1839,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } -static void unix_inet_redir_to_connected(int family, int type, - int sock_mapfd, int nop_mapfd, - int verd_mapfd, - enum redir_mode mode) +static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, + int nop_mapfd, int verd_mapfd, + enum redir_mode mode, int send_flags) { int c0, c1, p0, p1; int sfd[2]; @@ -1828,19 +1851,18 @@ static void unix_inet_redir_to_connected(int family, int type, if (err) return; - if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) goto close_cli0; c1 = sfd[0], p1 = sfd[1]; - pairs_redir_to_connected(c0, p0, c1, p1, - sock_mapfd, nop_mapfd, verd_mapfd, mode); + pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd, + verd_mapfd, mode, send_flags); xclose(c1); xclose(p1); close_cli0: xclose(c0); xclose(p0); - } static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, @@ -1859,31 +1881,42 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, skel->bss->test_ingress = false; unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, -1, verdict_map, - REDIR_EGRESS); + REDIR_EGRESS, NO_FLAGS); unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, -1, verdict_map, - REDIR_EGRESS); + REDIR_EGRESS, NO_FLAGS); unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, nop_map, verdict_map, - REDIR_EGRESS); + REDIR_EGRESS, NO_FLAGS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, + REDIR_EGRESS, NO_FLAGS); + + /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, nop_map, verdict_map, - REDIR_EGRESS); + REDIR_EGRESS, MSG_OOB); + skel->bss->test_ingress = true; unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, -1, verdict_map, - REDIR_INGRESS); + REDIR_INGRESS, NO_FLAGS); unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, -1, verdict_map, - REDIR_INGRESS); + REDIR_INGRESS, NO_FLAGS); unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, nop_map, verdict_map, - REDIR_INGRESS); + REDIR_INGRESS, NO_FLAGS); + unix_inet_redir_to_connected(family, SOCK_STREAM, + sock_map, nop_map, verdict_map, + REDIR_INGRESS, NO_FLAGS); + + /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, nop_map, verdict_map, - REDIR_INGRESS); + REDIR_INGRESS, MSG_OOB); xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 1d3a20f01b60..7cd8be2780ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -70,7 +70,7 @@ static void *server_thread(void *arg) return (void *)(long)err; } -static int custom_cb(int fd, const struct post_socket_opts *opts) +static int custom_cb(int fd, void *opts) { char buf; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index bc9841144685..1af9ec1149aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -9,6 +9,8 @@ #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" #include "test_tc_link.skel.h" + +#include "netlink_helpers.h" #include "tc_helpers.h" void serial_test_tc_links_basic(void) @@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void) test_tc_links_ingress(BPF_TCX_INGRESS, false, false); } +struct qdisc_req { + struct nlmsghdr n; + struct tcmsg t; + char buf[1024]; +}; + +static int qdisc_replace(int ifindex, const char *kind, bool block) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct qdisc_req req; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWQDISC; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = 0xfffffff1; + + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); + if (block) + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + +void serial_test_tc_links_dev_chain0(void) +{ + int err, ifindex; + + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); + ifindex = if_nametoindex("foo"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + err = qdisc_replace(ifindex, "ingress", true); + if (!ASSERT_OK(err, "attaching ingress")) + goto cleanup; + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); + err = qdisc_replace(ifindex, "clsact", false); + if (!ASSERT_OK(err, "attaching clsact")) + goto cleanup; + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s + * triggered the issue without the fix reliably 100% of the time. + */ + sleep(5); + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); +cleanup: + ASSERT_OK(system("ip link del dev foo"), "del veth"); + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); +} + static void test_tc_links_dev_mixed(int target) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index b1073d36d77a..327d51f59142 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -890,9 +890,6 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); - /* non mono delivery time is not forwarded */ - ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c index ae93411fd582..09ca13bdf6ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c +++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c @@ -11,6 +11,7 @@ static int sanity_run(struct bpf_program *prog) .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, + .flags = BPF_F_TEST_SKB_CHECKSUM_COMPLETE, ); prog_fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 29e183a80f49..bbcf12696a6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -3,9 +3,12 @@ #include <test_progs.h> #include <time.h> +#include <sys/epoll.h> + #include "struct_ops_module.skel.h" #include "struct_ops_nulled_out_cb.skel.h" #include "struct_ops_forgotten_cb.skel.h" +#include "struct_ops_detach.skel.h" static void check_map_info(struct bpf_map_info *info) { @@ -242,6 +245,58 @@ cleanup: struct_ops_forgotten_cb__destroy(skel); } +/* Detach a link from a user space program */ +static void test_detach_link(void) +{ + struct epoll_event ev, events[2]; + struct struct_ops_detach *skel; + struct bpf_link *link = NULL; + int fd, epollfd = -1, nfds; + int err; + + skel = struct_ops_detach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + fd = bpf_link__fd(link); + if (!ASSERT_GE(fd, 0, "link_fd")) + goto cleanup; + + epollfd = epoll_create1(0); + if (!ASSERT_GE(epollfd, 0, "epoll_create1")) + goto cleanup; + + ev.events = EPOLLHUP; + ev.data.fd = fd; + err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); + if (!ASSERT_OK(err, "epoll_ctl")) + goto cleanup; + + err = bpf_link__detach(link); + if (!ASSERT_OK(err, "detach_link")) + goto cleanup; + + /* Wait for EPOLLHUP */ + nfds = epoll_wait(epollfd, events, 2, 500); + if (!ASSERT_EQ(nfds, 1, "epoll_wait")) + goto cleanup; + + if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd")) + goto cleanup; + if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events")) + goto cleanup; + +cleanup: + if (epollfd >= 0) + close(epollfd); + bpf_link__destroy(link); + struct_ops_detach__destroy(skel); +} + void serial_test_struct_ops_module(void) { if (test__start_subtest("struct_ops_load")) @@ -254,5 +309,7 @@ void serial_test_struct_ops_module(void) test_struct_ops_nulled_out_cb(); if (test__start_subtest("struct_ops_forgotten_cb")) test_struct_ops_forgotten_cb(); + if (test__start_subtest("test_detach_link")) + test_detach_link(); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 000000000000..871d16cb95cf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <pthread.h> +#include <network_helpers.h> + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index fe0fb0c9849a..19e68d4b3532 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -3,8 +3,9 @@ #include <test_progs.h> #include "tracing_struct.skel.h" +#include "tracing_struct_many_args.skel.h" -static void test_fentry(void) +static void test_struct_args(void) { struct tracing_struct *skel; int err; @@ -55,6 +56,25 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t6, 1, "t6 ret"); +destroy_skel: + tracing_struct__destroy(skel); +} + +static void test_struct_many_args(void) +{ + struct tracing_struct_many_args *skel; + int err; + + skel = tracing_struct_many_args__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct_many_args__open_and_load")) + return; + + err = tracing_struct_many_args__attach(skel); + if (!ASSERT_OK(err, "tracing_struct_many_args__attach")) + goto destroy_skel; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + ASSERT_EQ(skel->bss->t7_a, 16, "t7:a"); ASSERT_EQ(skel->bss->t7_b, 17, "t7:b"); ASSERT_EQ(skel->bss->t7_c, 18, "t7:c"); @@ -74,12 +94,28 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t8_g, 23, "t8:g"); ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret"); - tracing_struct__detach(skel); + ASSERT_EQ(skel->bss->t9_a, 16, "t9:a"); + ASSERT_EQ(skel->bss->t9_b, 17, "t9:b"); + ASSERT_EQ(skel->bss->t9_c, 18, "t9:c"); + ASSERT_EQ(skel->bss->t9_d, 19, "t9:d"); + ASSERT_EQ(skel->bss->t9_e, 20, "t9:e"); + ASSERT_EQ(skel->bss->t9_f, 21, "t9:f"); + ASSERT_EQ(skel->bss->t9_g, 22, "t9:f"); + ASSERT_EQ(skel->bss->t9_h_a, 23, "t9:h.a"); + ASSERT_EQ(skel->bss->t9_h_b, 24, "t9:h.b"); + ASSERT_EQ(skel->bss->t9_h_c, 25, "t9:h.c"); + ASSERT_EQ(skel->bss->t9_h_d, 26, "t9:h.d"); + ASSERT_EQ(skel->bss->t9_i, 27, "t9:i"); + ASSERT_EQ(skel->bss->t9_ret, 258, "t9 ret"); + destroy_skel: - tracing_struct__destroy(skel); + tracing_struct_many_args__destroy(skel); } void test_tracing_struct(void) { - test_fentry(); + if (test__start_subtest("struct_args")) + test_struct_args(); + if (test__start_subtest("struct_many_args")) + test_struct_many_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c new file mode 100644 index 000000000000..bd8c75b620c2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#ifdef __x86_64__ + +#include <unistd.h> +#include <asm/ptrace.h> +#include <linux/compiler.h> +#include <linux/stringify.h> +#include <sys/wait.h> +#include <sys/syscall.h> +#include <sys/prctl.h> +#include <asm/prctl.h> +#include "uprobe_syscall.skel.h" +#include "uprobe_syscall_executed.skel.h" + +__naked unsigned long uretprobe_regs_trigger(void) +{ + asm volatile ( + "movq $0xdeadbeef, %rax\n" + "ret\n" + ); +} + +__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after) +{ + asm volatile ( + "movq %r15, 0(%rdi)\n" + "movq %r14, 8(%rdi)\n" + "movq %r13, 16(%rdi)\n" + "movq %r12, 24(%rdi)\n" + "movq %rbp, 32(%rdi)\n" + "movq %rbx, 40(%rdi)\n" + "movq %r11, 48(%rdi)\n" + "movq %r10, 56(%rdi)\n" + "movq %r9, 64(%rdi)\n" + "movq %r8, 72(%rdi)\n" + "movq %rax, 80(%rdi)\n" + "movq %rcx, 88(%rdi)\n" + "movq %rdx, 96(%rdi)\n" + "movq %rsi, 104(%rdi)\n" + "movq %rdi, 112(%rdi)\n" + "movq $0, 120(%rdi)\n" /* orig_rax */ + "movq $0, 128(%rdi)\n" /* rip */ + "movq $0, 136(%rdi)\n" /* cs */ + "pushf\n" + "pop %rax\n" + "movq %rax, 144(%rdi)\n" /* eflags */ + "movq %rsp, 152(%rdi)\n" /* rsp */ + "movq $0, 160(%rdi)\n" /* ss */ + + /* save 2nd argument */ + "pushq %rsi\n" + "call uretprobe_regs_trigger\n" + + /* save return value and load 2nd argument pointer to rax */ + "pushq %rax\n" + "movq 8(%rsp), %rax\n" + + "movq %r15, 0(%rax)\n" + "movq %r14, 8(%rax)\n" + "movq %r13, 16(%rax)\n" + "movq %r12, 24(%rax)\n" + "movq %rbp, 32(%rax)\n" + "movq %rbx, 40(%rax)\n" + "movq %r11, 48(%rax)\n" + "movq %r10, 56(%rax)\n" + "movq %r9, 64(%rax)\n" + "movq %r8, 72(%rax)\n" + "movq %rcx, 88(%rax)\n" + "movq %rdx, 96(%rax)\n" + "movq %rsi, 104(%rax)\n" + "movq %rdi, 112(%rax)\n" + "movq $0, 120(%rax)\n" /* orig_rax */ + "movq $0, 128(%rax)\n" /* rip */ + "movq $0, 136(%rax)\n" /* cs */ + + /* restore return value and 2nd argument */ + "pop %rax\n" + "pop %rsi\n" + + "movq %rax, 80(%rsi)\n" + + "pushf\n" + "pop %rax\n" + + "movq %rax, 144(%rsi)\n" /* eflags */ + "movq %rsp, 152(%rsi)\n" /* rsp */ + "movq $0, 160(%rsi)\n" /* ss */ + "ret\n" +); +} + +static void test_uretprobe_regs_equal(void) +{ + struct uprobe_syscall *skel = NULL; + struct pt_regs before = {}, after = {}; + unsigned long *pb = (unsigned long *) &before; + unsigned long *pa = (unsigned long *) &after; + unsigned long *pp; + unsigned int i, cnt; + int err; + + skel = uprobe_syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load")) + goto cleanup; + + err = uprobe_syscall__attach(skel); + if (!ASSERT_OK(err, "uprobe_syscall__attach")) + goto cleanup; + + uretprobe_regs(&before, &after); + + pp = (unsigned long *) &skel->bss->regs; + cnt = sizeof(before)/sizeof(*pb); + + for (i = 0; i < cnt; i++) { + unsigned int offset = i * sizeof(unsigned long); + + /* + * Check register before and after uretprobe_regs_trigger call + * that triggers the uretprobe. + */ + switch (offset) { + case offsetof(struct pt_regs, rax): + ASSERT_EQ(pa[i], 0xdeadbeef, "return value"); + break; + default: + if (!ASSERT_EQ(pb[i], pa[i], "register before-after value check")) + fprintf(stdout, "failed register offset %u\n", offset); + } + + /* + * Check register seen from bpf program and register after + * uretprobe_regs_trigger call + */ + switch (offset) { + /* + * These values will be different (not set in uretprobe_regs), + * we don't care. + */ + case offsetof(struct pt_regs, orig_rax): + case offsetof(struct pt_regs, rip): + case offsetof(struct pt_regs, cs): + case offsetof(struct pt_regs, rsp): + case offsetof(struct pt_regs, ss): + break; + default: + if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check")) + fprintf(stdout, "failed register offset %u\n", offset); + } + } + +cleanup: + uprobe_syscall__destroy(skel); +} + +#define BPF_TESTMOD_UPROBE_TEST_FILE "/sys/kernel/bpf_testmod_uprobe" + +static int write_bpf_testmod_uprobe(unsigned long offset) +{ + size_t n, ret; + char buf[30]; + int fd; + + n = sprintf(buf, "%lu", offset); + + fd = open(BPF_TESTMOD_UPROBE_TEST_FILE, O_WRONLY); + if (fd < 0) + return -errno; + + ret = write(fd, buf, n); + close(fd); + return ret != n ? (int) ret : 0; +} + +static void test_uretprobe_regs_change(void) +{ + struct pt_regs before = {}, after = {}; + unsigned long *pb = (unsigned long *) &before; + unsigned long *pa = (unsigned long *) &after; + unsigned long cnt = sizeof(before)/sizeof(*pb); + unsigned int i, err, offset; + + offset = get_uprobe_offset(uretprobe_regs_trigger); + + err = write_bpf_testmod_uprobe(offset); + if (!ASSERT_OK(err, "register_uprobe")) + return; + + uretprobe_regs(&before, &after); + + err = write_bpf_testmod_uprobe(0); + if (!ASSERT_OK(err, "unregister_uprobe")) + return; + + for (i = 0; i < cnt; i++) { + unsigned int offset = i * sizeof(unsigned long); + + switch (offset) { + case offsetof(struct pt_regs, rax): + ASSERT_EQ(pa[i], 0x12345678deadbeef, "rax"); + break; + case offsetof(struct pt_regs, rcx): + ASSERT_EQ(pa[i], 0x87654321feebdaed, "rcx"); + break; + case offsetof(struct pt_regs, r11): + ASSERT_EQ(pa[i], (__u64) -1, "r11"); + break; + default: + if (!ASSERT_EQ(pa[i], pb[i], "register before-after value check")) + fprintf(stdout, "failed register offset %u\n", offset); + } + } +} + +#ifndef __NR_uretprobe +#define __NR_uretprobe 467 +#endif + +__naked unsigned long uretprobe_syscall_call_1(void) +{ + /* + * Pretend we are uretprobe trampoline to trigger the return + * probe invocation in order to verify we get SIGILL. + */ + asm volatile ( + "pushq %rax\n" + "pushq %rcx\n" + "pushq %r11\n" + "movq $" __stringify(__NR_uretprobe) ", %rax\n" + "syscall\n" + "popq %r11\n" + "popq %rcx\n" + "retq\n" + ); +} + +__naked unsigned long uretprobe_syscall_call(void) +{ + asm volatile ( + "call uretprobe_syscall_call_1\n" + "retq\n" + ); +} + +static void test_uretprobe_syscall_call(void) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .retprobe = true, + ); + struct uprobe_syscall_executed *skel; + int pid, status, err, go[2], c; + + if (ASSERT_OK(pipe(go), "pipe")) + return; + + skel = uprobe_syscall_executed__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load")) + goto cleanup; + + pid = fork(); + if (!ASSERT_GE(pid, 0, "fork")) + goto cleanup; + + /* child */ + if (pid == 0) { + close(go[1]); + + /* wait for parent's kick */ + err = read(go[0], &c, 1); + if (err != 1) + exit(-1); + + uretprobe_syscall_call(); + _exit(0); + } + + skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid, + "/proc/self/exe", + "uretprobe_syscall_call", &opts); + if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + /* kick the child */ + write(go[1], &c, 1); + err = waitpid(pid, &status, 0); + ASSERT_EQ(err, pid, "waitpid"); + + /* verify the child got killed with SIGILL */ + ASSERT_EQ(WIFSIGNALED(status), 1, "WIFSIGNALED"); + ASSERT_EQ(WTERMSIG(status), SIGILL, "WTERMSIG"); + + /* verify the uretprobe program wasn't called */ + ASSERT_EQ(skel->bss->executed, 0, "executed"); + +cleanup: + uprobe_syscall_executed__destroy(skel); + close(go[1]); + close(go[0]); +} + +/* + * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c. + * + * For use in inline enablement of shadow stack. + * + * The program can't return from the point where shadow stack gets enabled + * because there will be no address on the shadow stack. So it can't use + * syscall() for enablement, since it is a function. + * + * Based on code from nolibc.h. Keep a copy here because this can't pull + * in all of nolibc.h. + */ +#define ARCH_PRCTL(arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = __NR_arch_prctl; \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#ifndef ARCH_SHSTK_ENABLE +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#endif + +static void test_uretprobe_shadow_stack(void) +{ + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + test__skip(); + return; + } + + /* Run all of the uretprobe tests. */ + test_uretprobe_regs_equal(); + test_uretprobe_regs_change(); + test_uretprobe_syscall_call(); + + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); +} +#else +static void test_uretprobe_regs_equal(void) +{ + test__skip(); +} + +static void test_uretprobe_regs_change(void) +{ + test__skip(); +} + +static void test_uretprobe_syscall_call(void) +{ + test__skip(); +} + +static void test_uretprobe_shadow_stack(void) +{ + test__skip(); +} +#endif + +void test_uprobe_syscall(void) +{ + if (test__start_subtest("uretprobe_regs_equal")) + test_uretprobe_regs_equal(); + if (test__start_subtest("uretprobe_regs_change")) + test_uretprobe_regs_change(); + if (test__start_subtest("uretprobe_syscall_call")) + test_uretprobe_syscall_call(); + if (test__start_subtest("uretprobe_shadow_stack")) + test_uretprobe_shadow_stack(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c new file mode 100644 index 000000000000..6deb8d560ddd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "uretprobe_stack.skel.h" +#include "../sdt.h" + +/* We set up target_1() -> target_2() -> target_3() -> target_4() -> USDT() + * call chain, each being traced by our BPF program. On entry or return from + * each target_*() we are capturing user stack trace and recording it in + * global variable, so that user space part of the test can validate it. + * + * Note, we put each target function into a custom section to get those + * __start_XXX/__stop_XXX symbols, generated by linker for us, which allow us + * to know address range of those functions + */ +__attribute__((section("uprobe__target_4"))) +__weak int target_4(void) +{ + STAP_PROBE1(uretprobe_stack, target, 42); + return 42; +} + +extern const void *__start_uprobe__target_4; +extern const void *__stop_uprobe__target_4; + +__attribute__((section("uprobe__target_3"))) +__weak int target_3(void) +{ + return target_4(); +} + +extern const void *__start_uprobe__target_3; +extern const void *__stop_uprobe__target_3; + +__attribute__((section("uprobe__target_2"))) +__weak int target_2(void) +{ + return target_3(); +} + +extern const void *__start_uprobe__target_2; +extern const void *__stop_uprobe__target_2; + +__attribute__((section("uprobe__target_1"))) +__weak int target_1(int depth) +{ + if (depth < 1) + return 1 + target_1(depth + 1); + else + return target_2(); +} + +extern const void *__start_uprobe__target_1; +extern const void *__stop_uprobe__target_1; + +extern const void *__start_uretprobe_stack_sec; +extern const void *__stop_uretprobe_stack_sec; + +struct range { + long start; + long stop; +}; + +static struct range targets[] = { + {}, /* we want target_1 to map to target[1], so need 1-based indexing */ + { (long)&__start_uprobe__target_1, (long)&__stop_uprobe__target_1 }, + { (long)&__start_uprobe__target_2, (long)&__stop_uprobe__target_2 }, + { (long)&__start_uprobe__target_3, (long)&__stop_uprobe__target_3 }, + { (long)&__start_uprobe__target_4, (long)&__stop_uprobe__target_4 }, +}; + +static struct range caller = { + (long)&__start_uretprobe_stack_sec, + (long)&__stop_uretprobe_stack_sec, +}; + +static void validate_stack(__u64 *ips, int stack_len, int cnt, ...) +{ + int i, j; + va_list args; + + if (!ASSERT_GT(stack_len, 0, "stack_len")) + return; + + stack_len /= 8; + + /* check if we have enough entries to satisfy test expectations */ + if (!ASSERT_GE(stack_len, cnt, "stack_len2")) + return; + + if (env.verbosity >= VERBOSE_NORMAL) { + printf("caller: %#lx - %#lx\n", caller.start, caller.stop); + for (i = 1; i < ARRAY_SIZE(targets); i++) + printf("target_%d: %#lx - %#lx\n", i, targets[i].start, targets[i].stop); + for (i = 0; i < stack_len; i++) { + for (j = 1; j < ARRAY_SIZE(targets); j++) { + if (ips[i] >= targets[j].start && ips[i] < targets[j].stop) + break; + } + if (j < ARRAY_SIZE(targets)) { /* found target match */ + printf("ENTRY #%d: %#lx (in target_%d)\n", i, (long)ips[i], j); + } else if (ips[i] >= caller.start && ips[i] < caller.stop) { + printf("ENTRY #%d: %#lx (in caller)\n", i, (long)ips[i]); + } else { + printf("ENTRY #%d: %#lx\n", i, (long)ips[i]); + } + } + } + + va_start(args, cnt); + + for (i = cnt - 1; i >= 0; i--) { + /* most recent entry is the deepest target function */ + const struct range *t = va_arg(args, const struct range *); + + ASSERT_GE(ips[i], t->start, "addr_start"); + ASSERT_LT(ips[i], t->stop, "addr_stop"); + } + + va_end(args); +} + +/* __weak prevents inlining */ +__attribute__((section("uretprobe_stack_sec"))) +__weak void test_uretprobe_stack(void) +{ + LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct uretprobe_stack *skel; + int err; + + skel = uretprobe_stack__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = uretprobe_stack__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + /* trigger */ + ASSERT_EQ(target_1(0), 42 + 1, "trigger_return"); + + /* + * Stacks captured on ENTRY uprobes + */ + + /* (uprobe 1) target_1 in stack trace*/ + validate_stack(skel->bss->entry_stack1, skel->bss->entry1_len, + 2, &caller, &targets[1]); + /* (uprobe 1, recursed) */ + validate_stack(skel->bss->entry_stack1_recur, skel->bss->entry1_recur_len, + 3, &caller, &targets[1], &targets[1]); + /* (uprobe 2) caller -> target_1 -> target_1 -> target_2 */ + validate_stack(skel->bss->entry_stack2, skel->bss->entry2_len, + 4, &caller, &targets[1], &targets[1], &targets[2]); + /* (uprobe 3) */ + validate_stack(skel->bss->entry_stack3, skel->bss->entry3_len, + 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]); + /* (uprobe 4) caller -> target_1 -> target_1 -> target_2 -> target_3 -> target_4 */ + validate_stack(skel->bss->entry_stack4, skel->bss->entry4_len, + 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]); + + /* (USDT): full caller -> target_1 -> target_1 -> target_2 (uretprobed) + * -> target_3 -> target_4 (uretprobes) chain + */ + validate_stack(skel->bss->usdt_stack, skel->bss->usdt_len, + 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]); + + /* + * Now stacks captured on the way out in EXIT uprobes + */ + + /* (uretprobe 4) everything up to target_4, but excluding it */ + validate_stack(skel->bss->exit_stack4, skel->bss->exit4_len, + 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]); + /* we didn't install uretprobes on target_2 and target_3 */ + /* (uretprobe 1, recur) first target_1 call only */ + validate_stack(skel->bss->exit_stack1_recur, skel->bss->exit1_recur_len, + 2, &caller, &targets[1]); + /* (uretprobe 1) just a caller in the stack trace */ + validate_stack(skel->bss->exit_stack1, skel->bss->exit1_len, + 1, &caller); + +cleanup: + uretprobe_stack__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 1c9c4ec1be11..9dc3687bc406 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -53,6 +53,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" @@ -86,6 +87,7 @@ #include "verifier_xadd.skel.h" #include "verifier_xdp.skel.h" #include "verifier_xdp_direct_packet_access.skel.h" +#include "verifier_bits_iter.skel.h" #define MAX_ENTRIES 11 @@ -170,6 +172,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } @@ -202,6 +205,7 @@ void test_verifier_var_off(void) { RUN(verifier_var_off); } void test_verifier_xadd(void) { RUN(verifier_xadd); } void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } +void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index f09505f8b038..53d6ad8c2257 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void) prog = bpf_object__next_program(obj, NULL); if (bpf_object__load(obj)) - return; + goto out; prog_fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c new file mode 100644 index 000000000000..e1bf141d3401 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <bpf/btf.h> +#include <linux/if_link.h> +#include <linux/udp.h> +#include <net/if.h> +#include <unistd.h> + +#include "xdp_flowtable.skel.h" + +#define TX_NETNS_NAME "ns0" +#define RX_NETNS_NAME "ns1" + +#define TX_NAME "v0" +#define FORWARD_NAME "v1" +#define RX_NAME "d0" + +#define TX_MAC "00:00:00:00:00:01" +#define FORWARD_MAC "00:00:00:00:00:02" +#define RX_MAC "00:00:00:00:00:03" +#define DST_MAC "00:00:00:00:00:04" + +#define TX_ADDR "10.0.0.1" +#define FORWARD_ADDR "10.0.0.2" +#define RX_ADDR "20.0.0.1" +#define DST_ADDR "20.0.0.2" + +#define PREFIX_LEN "8" +#define N_PACKETS 10 +#define UDP_PORT 12345 +#define UDP_PORT_STR "12345" + +static int send_udp_traffic(void) +{ + struct sockaddr_storage addr; + int i, sock; + + if (make_sockaddr(AF_INET, DST_ADDR, UDP_PORT, &addr, NULL)) + return -EINVAL; + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) + return sock; + + for (i = 0; i < N_PACKETS; i++) { + unsigned char buf[] = { 0xaa, 0xbb, 0xcc }; + int n; + + n = sendto(sock, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM, + (struct sockaddr *)&addr, sizeof(addr)); + if (n != sizeof(buf)) { + close(sock); + return -EINVAL; + } + + usleep(50000); /* 50ms */ + } + close(sock); + + return 0; +} + +void test_xdp_flowtable(void) +{ + struct xdp_flowtable *skel = NULL; + struct nstoken *tok = NULL; + int iifindex, stats_fd; + __u32 value, key = 0; + struct bpf_link *link; + + if (SYS_NOFAIL("nft -v")) { + fprintf(stdout, "Missing required nft tool\n"); + test__skip(); + return; + } + + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); + + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + SYS(out, "sysctl -qw net.ipv4.conf.all.forwarding=1"); + + SYS(out, "ip link add " TX_NAME " type veth peer " FORWARD_NAME); + SYS(out, "ip link set " TX_NAME " netns " TX_NETNS_NAME); + SYS(out, "ip link set dev " FORWARD_NAME " address " FORWARD_MAC); + SYS(out, + "ip addr add " FORWARD_ADDR "/" PREFIX_LEN " dev " FORWARD_NAME); + SYS(out, "ip link set dev " FORWARD_NAME " up"); + + SYS(out, "ip link add " RX_NAME " type dummy"); + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); + SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + SYS(out, "ip link set dev " RX_NAME " up"); + + /* configure the flowtable */ + SYS(out, "nft add table ip filter"); + SYS(out, + "nft add flowtable ip filter f { hook ingress priority 0\\; " + "devices = { " FORWARD_NAME ", " RX_NAME " }\\; }"); + SYS(out, + "nft add chain ip filter forward " + "{ type filter hook forward priority 0\\; }"); + SYS(out, + "nft add rule ip filter forward ip protocol udp th dport " + UDP_PORT_STR " flow add @f"); + + /* Avoid ARP calls */ + SYS(out, + "ip -4 neigh add " DST_ADDR " lladdr " DST_MAC " dev " RX_NAME); + + close_netns(tok); + tok = open_netns(TX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); + SYS(out, "ip link set dev " TX_NAME " up"); + SYS(out, "ip route add default via " FORWARD_ADDR); + + close_netns(tok); + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + iifindex = if_nametoindex(FORWARD_NAME); + if (!ASSERT_NEQ(iifindex, 0, "iifindex")) + goto out; + + skel = xdp_flowtable__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto out; + + link = bpf_program__attach_xdp(skel->progs.xdp_flowtable_do_lookup, + iifindex); + if (!ASSERT_OK_PTR(link, "prog_attach")) + goto out; + + close_netns(tok); + tok = open_netns(TX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + if (!ASSERT_OK(send_udp_traffic(), "send udp")) + goto out; + + close_netns(tok); + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + stats_fd = bpf_map__fd(skel->maps.stats); + if (!ASSERT_OK(bpf_map_lookup_elem(stats_fd, &key, &value), + "bpf_map_update_elem stats")) + goto out; + + ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed"); +out: + xdp_flowtable__destroy(skel); + if (tok) + close_netns(tok); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index f76b5d67a3ee..c87ee2bf558c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -68,7 +68,8 @@ static int open_xsk(int ifindex, struct xsk *xsk) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM | + XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx; diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index 55f10563208d..bb0acd79d28a 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -25,20 +25,13 @@ bool skip_tests = true; __u32 pid = 0; -#undef __arena -#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) -#define __arena __attribute__((address_space(1))) -#else -#define __arena SEC(".addr_space.1") -#endif - -__u64 __arena add64_value = 1; -__u64 __arena add64_result = 0; -__u32 __arena add32_value = 1; -__u32 __arena add32_result = 0; -__u64 __arena add_stack_value_copy = 0; -__u64 __arena add_stack_result = 0; -__u64 __arena add_noreturn_value = 1; +__u64 __arena_global add64_value = 1; +__u64 __arena_global add64_result = 0; +__u32 __arena_global add32_value = 1; +__u32 __arena_global add32_result = 0; +__u64 __arena_global add_stack_value_copy = 0; +__u64 __arena_global add_stack_result = 0; +__u64 __arena_global add_noreturn_value = 1; SEC("raw_tp/sys_enter") int add(const void *ctx) @@ -58,13 +51,13 @@ int add(const void *ctx) return 0; } -__s64 __arena sub64_value = 1; -__s64 __arena sub64_result = 0; -__s32 __arena sub32_value = 1; -__s32 __arena sub32_result = 0; -__s64 __arena sub_stack_value_copy = 0; -__s64 __arena sub_stack_result = 0; -__s64 __arena sub_noreturn_value = 1; +__s64 __arena_global sub64_value = 1; +__s64 __arena_global sub64_result = 0; +__s32 __arena_global sub32_value = 1; +__s32 __arena_global sub32_result = 0; +__s64 __arena_global sub_stack_value_copy = 0; +__s64 __arena_global sub_stack_result = 0; +__s64 __arena_global sub_noreturn_value = 1; SEC("raw_tp/sys_enter") int sub(const void *ctx) @@ -84,8 +77,8 @@ int sub(const void *ctx) return 0; } -__u64 __arena and64_value = (0x110ull << 32); -__u32 __arena and32_value = 0x110; +__u64 __arena_global and64_value = (0x110ull << 32); +__u32 __arena_global and32_value = 0x110; SEC("raw_tp/sys_enter") int and(const void *ctx) @@ -101,8 +94,8 @@ int and(const void *ctx) return 0; } -__u32 __arena or32_value = 0x110; -__u64 __arena or64_value = (0x110ull << 32); +__u32 __arena_global or32_value = 0x110; +__u64 __arena_global or64_value = (0x110ull << 32); SEC("raw_tp/sys_enter") int or(const void *ctx) @@ -117,8 +110,8 @@ int or(const void *ctx) return 0; } -__u64 __arena xor64_value = (0x110ull << 32); -__u32 __arena xor32_value = 0x110; +__u64 __arena_global xor64_value = (0x110ull << 32); +__u32 __arena_global xor32_value = 0x110; SEC("raw_tp/sys_enter") int xor(const void *ctx) @@ -133,12 +126,12 @@ int xor(const void *ctx) return 0; } -__u32 __arena cmpxchg32_value = 1; -__u32 __arena cmpxchg32_result_fail = 0; -__u32 __arena cmpxchg32_result_succeed = 0; -__u64 __arena cmpxchg64_value = 1; -__u64 __arena cmpxchg64_result_fail = 0; -__u64 __arena cmpxchg64_result_succeed = 0; +__u32 __arena_global cmpxchg32_value = 1; +__u32 __arena_global cmpxchg32_result_fail = 0; +__u32 __arena_global cmpxchg32_result_succeed = 0; +__u64 __arena_global cmpxchg64_value = 1; +__u64 __arena_global cmpxchg64_result_fail = 0; +__u64 __arena_global cmpxchg64_result_succeed = 0; SEC("raw_tp/sys_enter") int cmpxchg(const void *ctx) @@ -156,10 +149,10 @@ int cmpxchg(const void *ctx) return 0; } -__u64 __arena xchg64_value = 1; -__u64 __arena xchg64_result = 0; -__u32 __arena xchg32_value = 1; -__u32 __arena xchg32_result = 0; +__u64 __arena_global xchg64_value = 1; +__u64 __arena_global xchg64_result = 0; +__u32 __arena_global xchg32_value = 1; +__u32 __arena_global xchg32_result = 0; SEC("raw_tp/sys_enter") int xchg(const void *ctx) @@ -176,3 +169,79 @@ int xchg(const void *ctx) return 0; } + +__u64 __arena_global uaf_sink; +volatile __u64 __arena_global uaf_recovery_fails; + +SEC("syscall") +int uaf(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#if defined(ENABLE_ATOMICS_TESTS) && !defined(__TARGET_ARCH_arm64) && \ + !defined(__TARGET_ARCH_x86) + __u32 __arena *page32; + __u64 __arena *page64; + void __arena *page; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + bpf_arena_free_pages(&arena, page, 1); + uaf_recovery_fails = 24; + + page32 = (__u32 __arena *)page; + uaf_sink += __sync_fetch_and_add(page32, 1); + uaf_recovery_fails -= 1; + __sync_add_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_sub(page32, 1); + uaf_recovery_fails -= 1; + __sync_sub_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_and(page32, 1); + uaf_recovery_fails -= 1; + __sync_and_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_or(page32, 1); + uaf_recovery_fails -= 1; + __sync_or_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_xor(page32, 1); + uaf_recovery_fails -= 1; + __sync_xor_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_val_compare_and_swap(page32, 0, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_lock_test_and_set(page32, 1); + uaf_recovery_fails -= 1; + + page64 = (__u64 __arena *)page; + uaf_sink += __sync_fetch_and_add(page64, 1); + uaf_recovery_fails -= 1; + __sync_add_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_sub(page64, 1); + uaf_recovery_fails -= 1; + __sync_sub_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_and(page64, 1); + uaf_recovery_fails -= 1; + __sync_and_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_or(page64, 1); + uaf_recovery_fails -= 1; + __sync_or_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_xor(page64, 1); + uaf_recovery_fails -= 1; + __sync_xor_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_val_compare_and_swap(page64, 0, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_lock_test_and_set(page64, 1); + uaf_recovery_fails -= 1; +#endif + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c index 1e6ac187a6a0..81eaa94afeb0 100644 --- a/tools/testing/selftests/bpf/progs/arena_htab.c +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -18,25 +19,35 @@ void __arena *htab_for_user; bool skip = false; int zero = 0; +char __arena arr1[100000]; +char arr2[1000]; SEC("syscall") int arena_htab_llvm(void *ctx) { #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM) struct htab __arena *htab; + char __arena *arr = arr1; __u64 i; htab = bpf_alloc(sizeof(*htab)); cast_kern(htab); htab_init(htab); + cast_kern(arr); + /* first run. No old elems in the table */ - for (i = zero; i < 1000; i++) + for (i = zero; i < 100000 && can_loop; i++) { htab_update_elem(htab, i, i); + arr[i] = i; + } - /* should replace all elems with new ones */ - for (i = zero; i < 1000; i++) + /* should replace some elems with new ones */ + for (i = zero; i < 1000 && can_loop; i++) { htab_update_elem(htab, i, i); + /* Access mem to make the verifier use bounded loop logic */ + arr2[i] = i; + } cast_user(htab); htab_for_user = htab; #else diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index 93bd0600eba0..3a2ddcacbea6 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 3c9ffe340312..02f552e7fd4d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -65,7 +65,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca) } SEC("struct_ops") -void BPF_PROG(dctcp_init, struct sock *sk) +void BPF_PROG(bpf_dctcp_init, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -77,7 +77,7 @@ void BPF_PROG(dctcp_init, struct sock *sk) (void *)fallback, sizeof(fallback)) == -EBUSY) ebusy_cnt++; - /* Switch back to myself and the recurred dctcp_init() + /* Switch back to myself and the recurred bpf_dctcp_init() * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION), * except the last "cdg" one. */ @@ -112,7 +112,7 @@ void BPF_PROG(dctcp_init, struct sock *sk) } SEC("struct_ops") -__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) +__u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk) { struct bpf_dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -122,7 +122,7 @@ __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) } SEC("struct_ops") -void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) +void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -161,12 +161,12 @@ static void dctcp_react_to_loss(struct sock *sk) } SEC("struct_ops") -void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) +void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state) { if (new_state == TCP_CA_Recovery && new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) dctcp_react_to_loss(sk); - /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only * one loss-adjustment per RTT. */ } @@ -208,7 +208,7 @@ static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, } SEC("struct_ops") -void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) +void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) { struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -227,7 +227,7 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) } SEC("struct_ops") -__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) +__u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk) { const struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -237,28 +237,28 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; SEC("struct_ops") -void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) { tcp_reno_cong_avoid(sk, ack, acked); } SEC(".struct_ops") struct tcp_congestion_ops dctcp_nouse = { - .init = (void *)dctcp_init, - .set_state = (void *)dctcp_state, + .init = (void *)bpf_dctcp_init, + .set_state = (void *)bpf_dctcp_state, .flags = TCP_CONG_NEEDS_ECN, .name = "bpf_dctcp_nouse", }; SEC(".struct_ops") struct tcp_congestion_ops dctcp = { - .init = (void *)dctcp_init, - .in_ack_event = (void *)dctcp_update_alpha, - .cwnd_event = (void *)dctcp_cwnd_event, - .ssthresh = (void *)dctcp_ssthresh, - .cong_avoid = (void *)dctcp_cong_avoid, - .undo_cwnd = (void *)dctcp_cwnd_undo, - .set_state = (void *)dctcp_state, + .init = (void *)bpf_dctcp_init, + .in_ack_event = (void *)bpf_dctcp_update_alpha, + .cwnd_event = (void *)bpf_dctcp_cwnd_event, + .ssthresh = (void *)bpf_dctcp_ssthresh, + .cong_avoid = (void *)bpf_dctcp_cong_avoid, + .undo_cwnd = (void *)bpf_dctcp_cwnd_undo, + .set_state = (void *)bpf_dctcp_state, .flags = TCP_CONG_NEEDS_ECN, .name = "bpf_dctcp", }; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index c5969ca6f26b..564835ba7d51 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 3); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c index 85fa710fad90..9f0e0705b2bf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 3); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index fb2f5513e29e..81097a3f15eb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -7,9 +7,9 @@ * * The test_loader sequentially loads each program in a skeleton. * Programs could be loaded in privileged and unprivileged modes. - * - __success, __failure, __msg imply privileged mode; - * - __success_unpriv, __failure_unpriv, __msg_unpriv imply - * unprivileged mode. + * - __success, __failure, __msg, __regex imply privileged mode; + * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv + * imply unprivileged mode. * If combination of privileged and unprivileged attributes is present * both modes are used. If none are present privileged mode is implied. * @@ -24,6 +24,9 @@ * Multiple __msg attributes could be specified. * __msg_unpriv Same as __msg but for unprivileged mode. * + * __regex Same as __msg, but using a regular expression. + * __regex_unpriv Same as __msg_unpriv but using a regular expression. + * * __success Expect program load success in privileged mode. * __success_unpriv Expect program load success in unprivileged mode. * @@ -59,10 +62,12 @@ * __auxiliary_unpriv Same, but load program in unprivileged mode. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) +#define __regex(regex) __attribute__((btf_decl_tag("comment:test_expect_regex=" regex))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc))) #define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg))) +#define __regex_unpriv(regex) __attribute__((btf_decl_tag("comment:test_expect_regex_unpriv=" regex))) #define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv"))) #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) @@ -135,4 +140,8 @@ /* make it look to compiler like value is read and written */ #define __sink(expr) asm volatile("" : "+g"(expr)) +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c index ba97165bdb28..a657651eba52 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c @@ -14,9 +14,9 @@ typedef int *ptr_arr_t[6]; typedef int *ptr_multiarr_t[7][8][9][10]; -typedef int * (*fn_ptr_arr_t[11])(); +typedef int * (*fn_ptr_arr_t[11])(void); -typedef int * (*fn_ptr_multiarr_t[12][13])(); +typedef int * (*fn_ptr_multiarr_t[12][13])(void); struct root_struct { arr_t _1; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index ad21ee8c7e23..29d01fff32bd 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -100,7 +100,7 @@ typedef void (*printf_fn_t)(const char *, ...); * `int -> char *` function and returns pointer to a char. Equivalent: * typedef char * (*fn_input_t)(int); * typedef char * (*fn_output_outer_t)(fn_input_t); - * typedef const fn_output_outer_t (* fn_output_inner_t)(); + * typedef const fn_output_outer_t (* fn_output_inner_t)(void); * typedef const fn_output_inner_t fn_ptr_arr2_t[5]; */ /* ----- START-EXPECTED-OUTPUT ----- */ @@ -127,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])(void))(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 7a1e64c6c065..fd8106831c32 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -12,6 +12,31 @@ char _license[] SEC("license") = "GPL"; int pid, nr_cpus; +struct kptr_nested { + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_pair { + struct bpf_cpumask __kptr * mask_1; + struct bpf_cpumask __kptr * mask_2; +}; + +struct kptr_nested_mid { + int dummy; + struct kptr_nested m; +}; + +struct kptr_nested_deep { + struct kptr_nested_mid ptrs[2]; + struct kptr_nested_pair ptr_pairs[3]; +}; + +private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1]; +private(MASK) static struct kptr_nested global_mask_nested[2]; +private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep; + static bool is_test_task(void) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -461,6 +486,152 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_one_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + if (!is_test_task()) + return 0; + + /* Kptr arrays with one element are special cased, being treated + * just like a single pointer. + */ + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask_array_one[0], local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask_array_one[0]; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_rcu_read_unlock(); + + return 0; +} + +static int _global_mask_array_rcu(struct bpf_cpumask **mask0, + struct bpf_cpumask **mask1) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + /* Check if two kptrs in the array work and independently */ + + local = create_cpumask(); + if (!local) + return 0; + + bpf_rcu_read_lock(); + + local = bpf_kptr_xchg(mask0, local); + if (local) { + err = 1; + goto err_exit; + } + + /* [<mask 0>, NULL] */ + if (!*mask0 || *mask1) { + err = 2; + goto err_exit; + } + + local = create_cpumask(); + if (!local) { + err = 9; + goto err_exit; + } + + local = bpf_kptr_xchg(mask1, local); + if (local) { + err = 10; + goto err_exit; + } + + /* [<mask 0>, <mask 1>] */ + if (!*mask0 || !*mask1 || *mask0 == *mask1) { + err = 11; + goto err_exit; + } + +err_exit: + if (local) + bpf_cpumask_release(local); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array[0], &global_mask_array[1]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_l2_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array_l2[0][0], &global_mask_array_l2[1][0]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_nested[0].mask, &global_mask_nested[1].mask); +} + +/* Ensure that the field->offset has been correctly advanced from one + * nested struct or array sub-tree to another. In the case of + * kptr_nested_deep, it comprises two sub-trees: ktpr_1 and kptr_2. By + * calling bpf_kptr_xchg() on every single kptr in both nested sub-trees, + * the verifier should reject the program if the field->offset of any kptr + * is incorrect. + * + * For instance, if we have 10 kptrs in a nested struct and a program that + * accesses each kptr individually with bpf_kptr_xchg(), the compiler + * should emit instructions to access 10 different offsets if it works + * correctly. If the field->offset values of any pair of them are + * incorrectly the same, the number of unique offsets in btf_record for + * this nested struct should be less than 10. The verifier should fail to + * discover some of the offsets emitted by the compiler. + * + * Even if the field->offset values of kptrs are not duplicated, the + * verifier should fail to find a btf_field for the instruction accessing a + * kptr if the corresponding field->offset is pointing to a random + * incorrect offset. + */ +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clone_flags) +{ + int r, i; + + r = _global_mask_array_rcu(&global_mask_nested_deep.ptrs[0].m.mask, + &global_mask_nested_deep.ptrs[1].m.mask); + if (r) + return r; + + for (i = 0; i < 3; i++) { + r = _global_mask_array_rcu(&global_mask_nested_deep.ptr_pairs[i].mask_1, + &global_mask_nested_deep.ptr_pairs[i].mask_2); + if (r) + return r; + } + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *local; diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c index e61fe0882293..4ac956b26240 100644 --- a/tools/testing/selftests/bpf/progs/crypto_bench.c +++ b/tools/testing/selftests/bpf/progs/crypto_bench.c @@ -57,7 +57,7 @@ int crypto_encrypt(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; v = crypto_ctx_value_lookup(); if (!v) { @@ -73,9 +73,8 @@ int crypto_encrypt(struct __sk_buff *skb) bpf_dynptr_from_skb(skb, 0, &psrc); bpf_dynptr_from_mem(dst, len, 0, &pdst); - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); __sync_add_and_fetch(&hits, 1); return 0; @@ -84,7 +83,7 @@ int crypto_encrypt(struct __sk_buff *skb) SEC("tc") int crypto_decrypt(struct __sk_buff *skb) { - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; @@ -98,9 +97,8 @@ int crypto_decrypt(struct __sk_buff *skb) bpf_dynptr_from_skb(skb, 0, &psrc); bpf_dynptr_from_mem(dst, len, 0, &pdst); - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); __sync_add_and_fetch(&hits, 1); return 0; diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index 1be0a3fa5efd..645be6cddf36 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -89,7 +89,7 @@ int decrypt_sanity(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; int err; err = skb_dynptr_validate(skb, &psrc); @@ -114,12 +114,8 @@ int decrypt_sanity(struct __sk_buff *skb) * production code, a percpu map should be used to store the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - /* iv dynptr has to be initialized with 0 size, but proper memory region - * has to be provided anyway - */ - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); return TC_ACT_SHOT; } @@ -129,7 +125,7 @@ int encrypt_sanity(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; int err; status = 0; @@ -156,12 +152,8 @@ int encrypt_sanity(struct __sk_buff *skb) * production code, a percpu map should be used to store the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - /* iv dynptr has to be initialized with 0 size, but proper memory region - * has to be provided anyway - */ - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); return TC_ACT_SHOT; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 66a60bfb5867..e35bc1eac52a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -964,7 +964,7 @@ int dynptr_invalidate_slice_reinit(void *ctx) * mem_or_null pointers. */ SEC("?raw_tp") -__failure __msg("R1 type=scalar expected=percpu_ptr_") +__failure __regex("R[0-9]+ type=scalar expected=percpu_ptr_") int dynptr_invalidate_slice_or_null(void *ctx) { struct bpf_dynptr ptr; @@ -982,7 +982,7 @@ int dynptr_invalidate_slice_or_null(void *ctx) /* Destruction of dynptr should also any slices obtained from it */ SEC("?raw_tp") -__failure __msg("R7 invalid mem access 'scalar'") +__failure __regex("R[0-9]+ invalid mem access 'scalar'") int dynptr_invalidate_slice_failure(void *ctx) { struct bpf_dynptr ptr1; @@ -1069,7 +1069,7 @@ int dynptr_read_into_slot(void *ctx) /* bpf_dynptr_slice()s are read-only and cannot be written to */ SEC("?tc") -__failure __msg("R0 cannot write into rdonly_mem") +__failure __regex("R[0-9]+ cannot write into rdonly_mem") int skb_invalid_slice_write(struct __sk_buff *skb) { struct bpf_dynptr ptr; @@ -1686,3 +1686,27 @@ int test_dynptr_skb_small_buff(struct __sk_buff *skb) return !!data; } + +__noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) +{ + long ret = 0; + /* Avoid leaving this global function empty to avoid having the compiler + * optimize away the call to this global function. + */ + __sink(ret); + return ret; +} + +SEC("?raw_tp") +__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +int test_dynptr_reg_type(void *ctx) +{ + struct task_struct *current = NULL; + /* R1 should be holding a PTR_TO_BTF_ID, so this shouldn't be a + * reg->type that can be passed to a function accepting a + * ARG_PTR_TO_DYNPTR | MEM_RDONLY. process_dynptr_func() should catch + * this. + */ + global_call_bpf_dynptr((const struct bpf_dynptr *)current); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index 8956eb78a226..2011cacdeb18 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -5,13 +5,12 @@ char _license[] SEC("license") = "GPL"; -extern const void bpf_fentry_test1 __ksym; +extern int bpf_fentry_test1(int a) __ksym; +extern int bpf_modify_return_test(int a, int *b) __ksym; + extern const void bpf_fentry_test2 __ksym; extern const void bpf_fentry_test3 __ksym; extern const void bpf_fentry_test4 __ksym; -extern const void bpf_modify_return_test __ksym; -extern const void bpf_fentry_test6 __ksym; -extern const void bpf_fentry_test7 __ksym; extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 1c2b6c1616b0..645b2c9f7867 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,7 +12,7 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h) return ip6h->nexthdr == NEXTHDR_FRAGMENT; } -static int handle_v4(struct sk_buff *skb) +static int handle_v4(struct __sk_buff *skb) { struct bpf_dynptr ptr; u8 iph_buf[20] = {}; @@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb) return NF_ACCEPT; } -static int handle_v6(struct sk_buff *skb) +static int handle_v6(struct __sk_buff *skb) { struct bpf_dynptr ptr; struct ipv6hdr *ip6h; @@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb) SEC("netfilter") int defrag(struct bpf_nf_ctx *ctx) { - struct sk_buff *skb = ctx->skb; + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; - switch (bpf_ntohs(skb->protocol)) { + switch (bpf_ntohs(ctx->skb->protocol)) { case ETH_P_IP: return handle_v4(skb); case ETH_P_IPV6: diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index fe65e0952a1e..16bdc3e25591 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,8 +7,6 @@ #include "bpf_misc.h" #include "bpf_compiler.h" -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) - static volatile int zero = 0; int my_pid; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index cf68d1e48a0f..f502f755f567 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -177,4 +177,41 @@ int kfunc_call_test_static_unused_arg(struct __sk_buff *skb) return actual != expected ? -1 : 0; } +struct ctx_val { + struct bpf_testmod_ctx __kptr *ctx; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct ctx_val); +} ctx_map SEC(".maps"); + +SEC("tc") +int kfunc_call_ctx(struct __sk_buff *skb) +{ + struct bpf_testmod_ctx *ctx; + int err = 0; + + ctx = bpf_testmod_ctx_create(&err); + if (!ctx && !err) + err = -1; + if (ctx) { + int key = 0; + struct ctx_val *ctx_val = bpf_map_lookup_elem(&ctx_map, &key); + + /* Transfer ctx to map to be freed via implicit dtor call + * on cleanup. + */ + if (ctx_val) + ctx = bpf_kptr_xchg(&ctx_val->ctx, ctx); + if (ctx) { + bpf_testmod_ctx_release(ctx); + err = -1; + } + } + return err; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c index bbba9eb46551..bd8b7fb7061e 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c @@ -4,8 +4,7 @@ #include <bpf/bpf_tracing.h> #include <stdbool.h> #include "bpf_kfuncs.h" - -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c index d49070803e22..0835b5edf685 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c @@ -25,7 +25,7 @@ int BPF_PROG(trigger) static int check_cookie(__u64 val, __u64 *result) { - long *cookie; + __u64 *cookie; if (bpf_get_current_pid_tgid() >> 32 != pid) return 1; diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 26205ca80679..421f40835acd 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -4,13 +4,26 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_experimental.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" #include "linked_list.h" +struct head_nested_inner { + struct bpf_spin_lock lock; + struct bpf_list_head head __contains(foo, node2); +}; + +struct head_nested { + int dummy; + struct head_nested_inner inner; +}; + +private(C) struct bpf_spin_lock glock_c; +private(C) struct bpf_list_head ghead_array[2] __contains(foo, node2); +private(C) struct bpf_list_head ghead_array_one[1] __contains(foo, node2); + +private(D) struct head_nested ghead_nested; + static __always_inline int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) { @@ -310,6 +323,32 @@ int global_list_push_pop(void *ctx) } SEC("tc") +int global_list_push_pop_nested(void *ctx) +{ + return test_list_push_pop(&ghead_nested.inner.lock, &ghead_nested.inner.head); +} + +SEC("tc") +int global_list_array_push_pop(void *ctx) +{ + int r; + + r = test_list_push_pop(&glock_c, &ghead_array[0]); + if (r) + return r; + + r = test_list_push_pop(&glock_c, &ghead_array[1]); + if (r) + return r; + + /* Arrays with only one element is a special case, being treated + * just like a bpf_list_head variable by the verifier, not an + * array. + */ + return test_list_push_pop(&glock_c, &ghead_array_one[0]); +} + +SEC("tc") int map_list_push_pop_multiple(void *ctx) { struct map_value *v; diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c index 10b2325c1720..63245785eb69 100644 --- a/tools/testing/selftests/bpf/progs/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c @@ -7,7 +7,7 @@ __u32 target_id; -__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym; +__s64 bpf_map_sum_elem_count(const struct bpf_map *map) __ksym; SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h index 83d33931136e..1784b496be2e 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_common.h +++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h @@ -7,6 +7,6 @@ #include <stdbool.h> bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +__u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; #endif /* _NESTED_TRUST_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index ea39497f11ed..3568ec450100 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -31,14 +31,6 @@ int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc") -int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags) -{ - bpf_cpumask_first_zero(&task->cpus_mask); - return 0; -} - /* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */ SEC("tp_btf/tcp_probe") __failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c index 833840bffd3b..2b66953ca82e 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_success.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c @@ -32,3 +32,11 @@ int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb) bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0); return 0; } + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_nested_offset, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_first_zero(&task->cpus_mask); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index c0062645fc68..9e067dcbf607 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> +#include "bpf_misc.h" #include <errno.h> @@ -23,10 +24,6 @@ bool skip = false; #define BADPTR 0 #endif -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 6957d9f2805e..8bd1ebd7d6af 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -9,6 +9,7 @@ #include "err.h" #include "bpf_experimental.h" #include "bpf_compiler.h" +#include "bpf_misc.h" #ifndef NULL #define NULL 0 @@ -133,10 +134,6 @@ struct { __uint(max_entries, 16); } disallowed_exec_inodes SEC(".maps"); -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) -#endif - static INLINE bool IS_ERR(const void* ptr) { return IS_ERR_VALUE((unsigned long)ptr); diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index b09f4fffe57c..a3620c15c136 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -13,6 +13,15 @@ struct node_data { struct bpf_rb_node node; }; +struct root_nested_inner { + struct bpf_spin_lock glock; + struct bpf_rb_root root __contains(node_data, node); +}; + +struct root_nested { + struct root_nested_inner inner; +}; + long less_callback_ran = -1; long removed_key = -1; long first_data[2] = {-1, -1}; @@ -20,6 +29,9 @@ long first_data[2] = {-1, -1}; #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_data, node); +private(A) struct bpf_rb_root groot_array[2] __contains(node_data, node); +private(A) struct bpf_rb_root groot_array_one[1] __contains(node_data, node); +private(B) struct root_nested groot_nested; static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { @@ -72,6 +84,12 @@ long rbtree_add_nodes(void *ctx) } SEC("tc") +long rbtree_add_nodes_nested(void *ctx) +{ + return __add_three(&groot_nested.inner.root, &groot_nested.inner.glock); +} + +SEC("tc") long rbtree_add_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; @@ -110,6 +128,65 @@ err_out: } SEC("tc") +long rbtree_add_and_remove_array(void *ctx) +{ + struct bpf_rb_node *res1 = NULL, *res2 = NULL, *res3 = NULL; + struct node_data *nodes[3][2] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}}; + struct node_data *n; + long k1 = -1, k2 = -1, k3 = -1; + int i, j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + nodes[i][j] = bpf_obj_new(typeof(*nodes[i][j])); + if (!nodes[i][j]) + goto err_out; + nodes[i][j]->key = i * 2 + j; + } + } + + bpf_spin_lock(&glock); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array[i], &nodes[i][j]->node, less); + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array_one[0], &nodes[2][j]->node, less); + res1 = bpf_rbtree_remove(&groot_array[0], &nodes[0][0]->node); + res2 = bpf_rbtree_remove(&groot_array[1], &nodes[1][0]->node); + res3 = bpf_rbtree_remove(&groot_array_one[0], &nodes[2][0]->node); + bpf_spin_unlock(&glock); + + if (res1) { + n = container_of(res1, struct node_data, node); + k1 = n->key; + bpf_obj_drop(n); + } + if (res2) { + n = container_of(res2, struct node_data, node); + k2 = n->key; + bpf_obj_drop(n); + } + if (res3) { + n = container_of(res3, struct node_data, node); + k3 = n->key; + bpf_obj_drop(n); + } + if (k1 != 0 || k2 != 2 || k3 != 4) + return 2; + + return 0; + +err_out: + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + if (nodes[i][j]) + bpf_obj_drop(nodes[i][j]); + } + } + return 1; +} + +SEC("tc") long rbtree_first_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 3fecf1c6dfe5..b722a1e1ddef 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=10") +__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+") long rbtree_api_remove_no_drop(void *ctx) { struct bpf_rb_node *res; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1553b9c16aa7..f8d4b7cfcd68 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -32,7 +32,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) } SEC("?tc") -__failure __msg("Unreleased reference id=4 alloc_insn=21") +__failure __regex("Unreleased reference id=4 alloc_insn=[0-9]+") long rbtree_refcounted_node_ref_escapes(void *ctx) { struct node_acquire *n, *m; @@ -73,7 +73,7 @@ long refcount_acquire_maybe_null(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=9") +__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) { struct node_acquire *n, *m; diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 7a438600ae98..60518aed1ffc 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -6,10 +6,7 @@ #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" extern unsigned long CONFIG_HZ __kconfig; diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index db4abd2682fc..3bb4451524a1 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -33,6 +33,8 @@ int main_prog(struct __sk_buff *skb) struct iphdr *ip = NULL; struct tcphdr *tcp; __u8 proto = 0; + int urg_ptr; + u32 offset; if (!(ip = get_iphdr(skb))) goto out; @@ -48,7 +50,14 @@ int main_prog(struct __sk_buff *skb) if (!tcp) goto out; - return tcp->urg_ptr; + urg_ptr = tcp->urg_ptr; + + /* Checksum validation part */ + proto++; + offset = sizeof(struct ethhdr) + offsetof(struct iphdr, protocol); + bpf_skb_store_bytes(skb, offset, &proto, sizeof(proto), BPF_F_RECOMPUTE_CSUM); + + return urg_ptr; out: return -1; } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c new file mode 100644 index 000000000000..56b787a89876 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_do_detach; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index 3494ca30fa7f..4a4e0b8d9b72 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -7,10 +7,6 @@ #include "bpf_experimental.h" #include "bpf_misc.h" -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - struct generic_map_value { void *data; }; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 77ad8adf68da..f7b330ddd007 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> @@ -9,10 +10,14 @@ #define EINVAL 22 #define ENOENT 2 +#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL) +#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY) + extern unsigned long CONFIG_HZ __kconfig; int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; +int test_einval_reserved_new = 0; int test_einval_netns_id = 0; int test_einval_len_opts = 0; int test_eproto_l4proto = 0; @@ -22,6 +27,11 @@ int test_eafnosupport = 0; int test_alloc_entry = -EINVAL; int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; +int test_ct_zone_id_alloc_entry = -EINVAL; +int test_ct_zone_id_insert_entry = -EAFNOSUPPORT; +int test_ct_zone_id_succ_lookup = -ENOENT; +int test_ct_zone_dir_enoent_lookup = 0; +int test_ct_zone_id_enoent_lookup = 0; u32 test_delta_timeout = 0; u32 test_status = 0; u32 test_insert_lookup_mark = 0; @@ -45,6 +55,17 @@ struct bpf_ct_opts___local { s32 netns_id; s32 error; u8 l4proto; + u8 dir; + u8 reserved[2]; +}; + +struct bpf_ct_opts___new { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u16 ct_zone_id; + u8 ct_zone_dir; u8 reserved[3]; } __attribute__((preserve_access_index)); @@ -220,10 +241,97 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, } } +static __always_inline void +nf_ct_opts_new_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + void *ctx) +{ + struct bpf_ct_opts___new opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; + struct bpf_sock_tuple bpf_tuple; + struct nf_conn *ct; + + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); + + opts_def.reserved[0] = 1; + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + opts_def.reserved[0] = 0; + if (ct) + bpf_ct_release(ct); + else + test_einval_reserved_new = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + /* use non-default ct zone */ + opts_def.ct_zone_id = 10; + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + /* entry should exist in same ct zone we inserted it */ + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + bpf_ct_release(ct_lk); + test_ct_zone_id_succ_lookup = 0; + } + + /* entry should not exist with wrong direction */ + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_REPL; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_dir_enoent_lookup = opts_def.error; + + /* entry should not exist in default ct zone */ + opts_def.ct_zone_id = 0; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_id_enoent_lookup = opts_def.error; + + bpf_ct_release(ct_ins); + test_ct_zone_id_insert_entry = 0; + } + test_ct_zone_id_alloc_entry = 0; + } +} + SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } @@ -231,6 +339,7 @@ SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c index 0e4759ab38ff..a586f087ffeb 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 2dde8e3fe4c9..e68667aec6a6 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") -__failure __msg("arg#0 expected pointer to stack or dynptr_ptr") +__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val = 0; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c new file mode 100644 index 000000000000..7ac7e1de34d8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +SEC("tc") +int kfunc_dynptr_nullable_test1(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(&data, NULL); + + return 0; +} + +SEC("tc") +int kfunc_dynptr_nullable_test2(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(&data, &data); + + return 0; +} + +SEC("tc") +__failure __msg("expected pointer to stack or const struct bpf_dynptr") +int kfunc_dynptr_nullable_test3(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(NULL, &data); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c new file mode 100644 index 000000000000..350513c0e4c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* outputs */ +long passed = 0; +long discarded = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_write(void *ctx) +{ + int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32; + void *sample1, *sample2; + + if (cur_pid != pid) + return 0; + + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample1) + return 0; + /* first one can pass */ + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample2) { + bpf_ringbuf_discard(sample1, 0); + __sync_fetch_and_add(&discarded, 1); + return 0; + } + /* second one must not */ + __sync_fetch_and_add(&passed, 1); + foo = sample2 + 4084; + *foo = 256; + bpf_ringbuf_discard(sample1, 0); + bpf_ringbuf_discard(sample2, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 99d2ea9fb658..f48f85f1bd70 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -92,7 +92,7 @@ struct { __uint(value_size, sizeof(int)); } tls_sock_map SEC(".maps"); -SEC("sk_skb1") +SEC("sk_skb/stream_parser") int bpf_prog1(struct __sk_buff *skb) { int *f, two = 2; @@ -104,7 +104,7 @@ int bpf_prog1(struct __sk_buff *skb) return skb->len; } -SEC("sk_skb2") +SEC("sk_skb/stream_verdict") int bpf_prog2(struct __sk_buff *skb) { __u32 lport = skb->local_port; @@ -151,7 +151,7 @@ static inline void bpf_write_pass(struct __sk_buff *skb, int offset) memcpy(c + offset, "PASS", 4); } -SEC("sk_skb3") +SEC("sk_skb/stream_verdict") int bpf_prog3(struct __sk_buff *skb) { int err, *f, ret = SK_PASS; @@ -177,9 +177,6 @@ int bpf_prog3(struct __sk_buff *skb) return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); - if (f && *f) - ret = SK_DROP; err = bpf_skb_adjust_room(skb, 4, 0, 0); if (err) return SK_DROP; @@ -233,7 +230,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops) return 0; } -SEC("sk_msg1") +SEC("sk_msg") int bpf_prog4(struct sk_msg_md *msg) { int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; @@ -263,7 +260,7 @@ int bpf_prog4(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg2") +SEC("sk_msg") int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; @@ -308,7 +305,7 @@ int bpf_prog6(struct sk_msg_md *msg) #endif } -SEC("sk_msg3") +SEC("sk_msg") int bpf_prog8(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -329,7 +326,8 @@ int bpf_prog8(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg4") + +SEC("sk_msg") int bpf_prog9(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -347,7 +345,7 @@ int bpf_prog9(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg5") +SEC("sk_msg") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 7f74077d6622..548660e299a5 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -10,10 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" /* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ #define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */ diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index 68a75436e8af..81249d119a8b 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -10,10 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" /* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ #define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */ diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index efc3c61f7852..bbdd08764789 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -10,6 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" +#include "bpf_misc.h" /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF @@ -17,10 +18,6 @@ /* Max supported length of sysctl value string (pow2). */ #define MAX_VALUE_STR_LEN 0x40 -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 74ec09f040b7..ca8e8734d901 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -222,17 +222,21 @@ int egress_host(struct __sk_buff *skb) return TC_ACT_OK; if (skb_proto(skb_type) == IPPROTO_TCP) { - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); - } else { - if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && + } else if (skb_proto(skb_type) == IPPROTO_UDP) { + if (skb->tstamp_type == BPF_SKB_CLOCK_TAI && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); + } else { + if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME && + skb->tstamp) + inc_errs(EGRESS_ENDHOST); } skb->tstamp = EGRESS_ENDHOST_MAGIC; @@ -252,7 +256,7 @@ int ingress_host(struct __sk_buff *skb) if (!skb_type) return TC_ACT_OK; - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp == EGRESS_FWDNS_MAGIC) inc_dtimes(INGRESS_ENDHOST); else @@ -315,7 +319,6 @@ int egress_fwdns_prio100(struct __sk_buff *skb) SEC("tc") int ingress_fwdns_prio101(struct __sk_buff *skb) { - __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; int skb_type; skb_type = skb_get_type(skb); @@ -323,29 +326,24 @@ int ingress_fwdns_prio101(struct __sk_buff *skb) /* Should have handled in prio100 */ return TC_ACT_SHOT; - if (skb_proto(skb_type) == IPPROTO_UDP) - expected_dtime = 0; - if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || - skb->tstamp != expected_dtime) + (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC && + skb->tstamp_type != BPF_SKB_CLOCK_TAI) || + skb->tstamp != EGRESS_ENDHOST_MAGIC) inc_errs(INGRESS_FWDNS_P101); else inc_dtimes(INGRESS_FWDNS_P101); } else { - if (!fwdns_clear_dtime() && expected_dtime) + if (!fwdns_clear_dtime()) inc_errs(INGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = INGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } @@ -370,7 +368,7 @@ int egress_fwdns_prio101(struct __sk_buff *skb) if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || + skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC || skb->tstamp != INGRESS_FWDNS_MAGIC) inc_errs(EGRESS_FWDNS_P101); else @@ -380,14 +378,11 @@ int egress_fwdns_prio101(struct __sk_buff *skb) inc_errs(EGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = EGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index c8e4553648bf..44ee0d037f95 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -9,6 +9,7 @@ #include "bpf_kfuncs.h" #include "test_siphash.h" #include "test_tcp_custom_syncookie.h" +#include "bpf_misc.h" #define MAX_PACKET_OFF 0xffff diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h index 29a6a53cf229..f8b1b7e68d2e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -7,8 +7,6 @@ #define __packed __attribute__((__packed__)) #define __force -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #define swap(a, b) \ do { \ typeof(a) __tmp = (a); \ diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 000000000000..3e520133281e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c index 515daef3c84b..c435a3a8328a 100644 --- a/tools/testing/selftests/bpf/progs/tracing_struct.c +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -18,11 +18,6 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; -struct bpf_testmod_struct_arg_4 { - u64 a; - int b; -}; - long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; @@ -30,9 +25,6 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret; long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; long t5_ret; int t6; -long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret; -long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret; - SEC("fentry/bpf_testmod_test_struct_arg_1") int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) @@ -138,50 +130,4 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a) return 0; } -SEC("fentry/bpf_testmod_test_struct_arg_7") -int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f) -{ - t7_a = a; - t7_b = (long)b; - t7_c = c; - t7_d = d; - t7_e = (long)e; - t7_f_a = f.a; - t7_f_b = f.b; - return 0; -} - -SEC("fexit/bpf_testmod_test_struct_arg_7") -int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, ret) -{ - t7_ret = ret; - return 0; -} - -SEC("fentry/bpf_testmod_test_struct_arg_8") -int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, g) -{ - t8_a = a; - t8_b = (long)b; - t8_c = c; - t8_d = d; - t8_e = (long)e; - t8_f_a = f.a; - t8_f_b = f.b; - t8_g = g; - return 0; -} - -SEC("fexit/bpf_testmod_test_struct_arg_8") -int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, g, - int, ret) -{ - t8_ret = ret; - return 0; -} - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c new file mode 100644 index 000000000000..4742012ace06 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct bpf_testmod_struct_arg_4 { + u64 a; + int b; +}; + +struct bpf_testmod_struct_arg_5 { + char a; + short b; + int c; + long d; +}; + +long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret; +long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret; +long t9_a, t9_b, t9_c, t9_d, t9_e, t9_f, t9_g, t9_h_a, t9_h_b, t9_h_c, t9_h_d, t9_i, t9_ret; + +SEC("fentry/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_many_args_1, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f) +{ + t7_a = a; + t7_b = (long)b; + t7_c = c; + t7_d = d; + t7_e = (long)e; + t7_f_a = f.a; + t7_f_b = f.b; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_many_args_2, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, ret) +{ + t7_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_many_args_3, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g) +{ + t8_a = a; + t8_b = (long)b; + t8_c = c; + t8_d = d; + t8_e = (long)e; + t8_f_a = f.a; + t8_f_b = f.b; + t8_g = g; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_many_args_4, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g, + int, ret) +{ + t8_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_9") +int BPF_PROG2(test_struct_many_args_5, __u64, a, void *, b, short, c, int, d, void *, e, + char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i) +{ + t9_a = a; + t9_b = (long)b; + t9_c = c; + t9_d = d; + t9_e = (long)e; + t9_f = f; + t9_g = g; + t9_h_a = h.a; + t9_h_b = h.b; + t9_h_c = h.c; + t9_h_d = h.d; + t9_i = i; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_9") +int BPF_PROG2(test_struct_many_args_6, __u64, a, void *, b, short, c, int, d, void *, e, + char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i, int, ret) +{ + t9_ret = ret; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c new file mode 100644 index 000000000000..8a4fa6c7ef59 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <string.h> + +struct pt_regs regs; + +char _license[] SEC("license") = "GPL"; + +SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger") +int uretprobe(struct pt_regs *ctx) +{ + __builtin_memcpy(®s, ctx, sizeof(regs)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c new file mode 100644 index 000000000000..0d7f1a7db2e2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <string.h> + +struct pt_regs regs; + +char _license[] SEC("license") = "GPL"; + +int executed = 0; + +SEC("uretprobe.multi") +int test(struct pt_regs *regs) +{ + executed = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c new file mode 100644 index 000000000000..9fdcf396b8f4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> + +char _license[] SEC("license") = "GPL"; + +__u64 entry_stack1[32], exit_stack1[32]; +__u64 entry_stack1_recur[32], exit_stack1_recur[32]; +__u64 entry_stack2[32]; +__u64 entry_stack3[32]; +__u64 entry_stack4[32], exit_stack4[32]; +__u64 usdt_stack[32]; + +int entry1_len, exit1_len; +int entry1_recur_len, exit1_recur_len; +int entry2_len, exit2_len; +int entry3_len, exit3_len; +int entry4_len, exit4_len; +int usdt_len; + +#define SZ sizeof(usdt_stack) + +SEC("uprobe//proc/self/exe:target_1") +int BPF_UPROBE(uprobe_1) +{ + /* target_1 is recursive wit depth of 2, so we capture two separate + * stack traces, depending on which occurence it is + */ + static bool recur = false; + + if (!recur) + entry1_len = bpf_get_stack(ctx, &entry_stack1, SZ, BPF_F_USER_STACK); + else + entry1_recur_len = bpf_get_stack(ctx, &entry_stack1_recur, SZ, BPF_F_USER_STACK); + + recur = true; + return 0; +} + +SEC("uretprobe//proc/self/exe:target_1") +int BPF_URETPROBE(uretprobe_1) +{ + /* see above, target_1 is recursive */ + static bool recur = false; + + /* NOTE: order of returns is reversed to order of entries */ + if (!recur) + exit1_recur_len = bpf_get_stack(ctx, &exit_stack1_recur, SZ, BPF_F_USER_STACK); + else + exit1_len = bpf_get_stack(ctx, &exit_stack1, SZ, BPF_F_USER_STACK); + + recur = true; + return 0; +} + +SEC("uprobe//proc/self/exe:target_2") +int BPF_UPROBE(uprobe_2) +{ + entry2_len = bpf_get_stack(ctx, &entry_stack2, SZ, BPF_F_USER_STACK); + return 0; +} + +/* no uretprobe for target_2 */ + +SEC("uprobe//proc/self/exe:target_3") +int BPF_UPROBE(uprobe_3) +{ + entry3_len = bpf_get_stack(ctx, &entry_stack3, SZ, BPF_F_USER_STACK); + return 0; +} + +/* no uretprobe for target_3 */ + +SEC("uprobe//proc/self/exe:target_4") +int BPF_UPROBE(uprobe_4) +{ + entry4_len = bpf_get_stack(ctx, &entry_stack4, SZ, BPF_F_USER_STACK); + return 0; +} + +SEC("uretprobe//proc/self/exe:target_4") +int BPF_URETPROBE(uretprobe_4) +{ + exit4_len = bpf_get_stack(ctx, &exit_stack4, SZ, BPF_F_USER_STACK); + return 0; +} + +SEC("usdt//proc/self/exe:uretprobe_stack:target") +int BPF_USDT(usdt_probe) +{ + usdt_len = bpf_get_stack(ctx, &usdt_stack, SZ, BPF_F_USER_STACK); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 11ab25c42c36..54de0389f878 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -221,3 +221,25 @@ int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx) bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0); return 0; } + +__noinline long global_call_bpf_dynptr_data(struct bpf_dynptr *dynptr) +{ + bpf_dynptr_data(dynptr, 0xA, 0xA); + return 0; +} + +static long callback_adjust_bpf_dynptr_reg_off(struct bpf_dynptr *dynptr, + void *ctx) +{ + global_call_bpf_dynptr_data(dynptr += 1024); + return 0; +} + +SEC("?raw_tp") +__failure __msg("dereference of modified dynptr_ptr ptr R1 off=16384 disallowed") +int user_ringbuf_callback_const_ptr_to_dynptr_reg_off(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, + callback_adjust_bpf_dynptr_reg_off, NULL, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 93144ae6df74..67509c5d3982 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index ef66ea460264..6065f862d964 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c new file mode 100644 index 000000000000..716113c2bce2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "bpf_misc.h" +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, + u32 nr_bits) __ksym __weak; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak; + +SEC("iter.s/cgroup") +__description("bits iter without destroy") +__failure __msg("Unreleased reference") +int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it; + u64 data = 1; + + bpf_iter_bits_new(&it, &data, 1); + bpf_iter_bits_next(&it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->next()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits *it = NULL; + + bpf_iter_bits_next(it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->destroy()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it = {}; + + bpf_iter_bits_destroy(&it); + return 0; +} + +SEC("syscall") +__description("null pointer") +__success __retval(0) +int null_pointer(void) +{ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, NULL, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits copy") +__success __retval(10) +int bits_copy(void) +{ + u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits memalloc") +__success __retval(64) +int bits_memalloc(void) +{ + u64 data[2]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */ + bpf_for_each(bits, bit, &data[0], sizeof(data) / sizeof(u64)) + nr++; + return nr; +} + +SEC("syscall") +__description("bit index") +__success __retval(8) +int bit_index(void) +{ + u64 data = 0x100; + int bit_idx = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) { + if (*bit == 0) + continue; + bit_idx = *bit; + } + return bit_idx; +} + +SEC("syscall") +__description("bits nomem") +__success __retval(0) +int bits_nomem(void) +{ + u64 data[4]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xff, sizeof(data)); + bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */ + nr++; + return nr; +} + +SEC("syscall") +__description("fewer words") +__success __retval(1) +int fewer_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 1) + nr++; + return nr; +} + +SEC("syscall") +__description("zero words") +__success __retval(0) +int zero_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 0) + nr++; + return nr; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index bd676d7e615f..e54bb5385bc1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void) ); } +int tmp_var; +SEC("socket") +__failure __msg("infinite loop detected at insn 2") +__naked void jgt_imm64_and_may_goto(void) +{ + asm volatile (" \ + r0 = %[tmp_var] ll; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -3; /* off -3 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm_addr(tmp_var) + : __clobber_all); +} + +SEC("socket") +__failure __msg("infinite loop detected at insn 1") +__naked void may_goto_self(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -1; /* off -1 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__success __retval(0) +__naked void may_goto_neg_off(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ + goto l0_%=; \ + goto l1_%=; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -2; /* off -2 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ +l1_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) @@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb) return 0; } +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto2(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void jlt_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: call %[bpf_jiffies64]; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + if r0 < 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm(bpf_jiffies64) + : __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +SEC("socket") +__success __retval(0) +__naked void gotol_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + gotol l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} +#endif + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto_subprog(void) +{ + asm volatile (" \ + call subprog_with_may_goto; \ + exit; \ +" ::: __clobber_all); +} + +static __naked __noinline __used +void subprog_with_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + #define ARR_SZ 1000000 int zero; char arr[ARR_SZ]; @@ -405,4 +551,240 @@ int cond_break5(const void *ctx) return cnt1 > 1 && cnt2 > 1 ? 1 : 0; } +#define ARR2_SZ 1000 +SEC(".data.arr2") +char arr2[ARR2_SZ]; + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + __u64 i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < ARR2_SZ) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter_signed(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + long i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < ARR2_SZ && i >= 0) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +volatile const int limit = ARR2_SZ; + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter_volatile_limit(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + __u64 i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < limit) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +#define ARR_LONG_SZ 1000 + +SEC(".data.arr_long") +long arr_long[ARR_LONG_SZ]; + +SEC("socket") +__success +int test1(const void *ctx) +{ + long i; + + for (i = 0; i < ARR_LONG_SZ && can_loop; i++) + arr_long[i] = i; + return 0; +} + +SEC("socket") +__success +int test2(const void *ctx) +{ + __u64 i; + + for (i = zero; i < ARR_LONG_SZ && can_loop; i++) { + barrier_var(i); + arr_long[i] = i; + } + return 0; +} + +SEC(".data.arr_foo") +struct { + int a; + int b; +} arr_foo[ARR_LONG_SZ]; + +SEC("socket") +__success +int test3(const void *ctx) +{ + __u64 i; + + for (i = zero; i < ARR_LONG_SZ && can_loop; i++) { + barrier_var(i); + arr_foo[i].a = i; + arr_foo[i].b = i; + } + return 0; +} + +SEC("socket") +__success +int test4(const void *ctx) +{ + long i; + + for (i = zero + ARR_LONG_SZ - 1; i < ARR_LONG_SZ && i >= 0 && can_loop; i--) { + barrier_var(i); + arr_foo[i].a = i; + arr_foo[i].b = i; + } + return 0; +} + +char buf[10] SEC(".data.buf"); + +SEC("socket") +__description("check add const") +__success +__naked void check_add_const(void) +{ + /* typical LLVM generated loop with may_goto */ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 > 9 goto l1_%=; \ +l0_%=: r1 = %[buf]; \ + r2 = r0; \ + r1 += r2; \ + r3 = *(u8 *)(r1 +0); \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 4; /* off of l1_%=: */ \ + .long 0; /* imm */ \ + r0 = r2; \ + r0 += 1; \ + if r2 < 9 goto l0_%=; \ + exit; \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__msg("*(u8 *)(r7 +0) = r0") +__msg("invalid access to map value, value_size=10 off=10 size=1") +__naked void check_add_const_3regs(void) +{ + asm volatile ( + "r6 = %[buf];" + "r7 = %[buf];" + "call %[bpf_ktime_get_ns];" + "r1 = r0;" /* link r0.id == r1.id == r2.id */ + "r2 = r0;" + "r1 += 1;" /* r1 == r0+1 */ + "r2 += 2;" /* r2 == r0+2 */ + "if r0 > 8 goto 1f;" /* r0 range [0, 8] */ + "r6 += r1;" /* r1 range [1, 9] */ + "r7 += r2;" /* r2 range [2, 10] */ + "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */ + "*(u8 *)(r7 +0) = r0;" /* unsafe, out of bounds */ + "1: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__msg("*(u8 *)(r8 -1) = r0") +__msg("invalid access to map value, value_size=10 off=10 size=1") +__naked void check_add_const_3regs_2if(void) +{ + asm volatile ( + "r6 = %[buf];" + "r7 = %[buf];" + "r8 = %[buf];" + "call %[bpf_ktime_get_ns];" + "if r0 < 2 goto 1f;" + "r1 = r0;" /* link r0.id == r1.id == r2.id */ + "r2 = r0;" + "r1 += 1;" /* r1 == r0+1 */ + "r2 += 2;" /* r2 == r0+2 */ + "if r2 > 11 goto 1f;" /* r2 range [0, 11] -> r0 range [-2, 9]; r1 range [-1, 10] */ + "if r0 s< 0 goto 1f;" /* r0 range [0, 9] -> r1 range [1, 10]; r2 range [2, 11]; */ + "r6 += r0;" /* r0 range [0, 9] */ + "r7 += r1;" /* r1 range [1, 10] */ + "r8 += r2;" /* r2 range [2, 11] */ + "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */ + "*(u8 *)(r7 -1) = r0;" /* safe */ + "*(u8 *)(r8 -1) = r0;" /* unsafe */ + "1: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_add_const_regsafe_off(void) +{ + asm volatile ( + "r8 = %[buf];" + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + "call %[bpf_ktime_get_ns];" + "r1 = r0;" /* same ids for r1 and r0 */ + "if r6 > r7 goto 1f;" /* this jump can't be predicted */ + "r1 += 1;" /* r1.off == +1 */ + "goto 2f;" + "1: r1 += 100;" /* r1.off == +100 */ + "goto +0;" /* verify r1.off in regsafe() after this insn */ + "2: if r0 > 8 goto 3f;" /* r0 range [0,8], r1 range either [1,9] or [100,108]*/ + "r8 += r1;" + "*(u8 *)(r8 +0) = r0;" /* potentially unsafe, buf size is 10 */ + "3: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index cbb9d6714f53..028ec855587b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -224,6 +224,69 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV32SX, S8, var_off u32_max") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0") +__naked void mov64sx_s32_varoff_1(void) +{ + asm volatile (" \ +l0_%=: \ + r3 = *(u8 *)(r10 -387); \ + w7 = (s8)w3; \ + if w7 >= 0x2533823b goto l0_%=; \ + w0 = 0; \ + exit; \ +" : + : + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_2(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + w7 = (s8)w3; \ + if w7 s>= 16 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_3(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + r3 |= 0x80; \ + w7 = (s8)w3; \ + if w7 s>= -5 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index 65bba330e7e5..ab9f9f2620ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -90,8 +90,8 @@ __success __failure_unpriv __retval(0) int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) { + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; const struct nf_hook_state *state = ctx->state; - struct sk_buff *skb = ctx->skb; const struct iphdr *iph; const struct tcphdr *th; u8 buffer_iph[20] = {}; @@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) struct bpf_dynptr ptr; uint8_t ihl; - if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_ACCEPT; iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c new file mode 100644 index 000000000000..f37713a265ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("or_jmp32_k: bit ops + branch on unknown value") +__failure +__msg("R0 invalid mem access 'scalar'") +__naked void or_jmp32_k(void) +{ + asm volatile (" \ + r0 = 0xffffffff; \ + r0 /= 1; \ + r1 = 0; \ + w1 = -1; \ + w1 >>= 1; \ + w0 &= w1; \ + w0 |= 2; \ + if w0 != 0x7ffffffd goto l1; \ + r0 = 1; \ + exit; \ +l3: \ + r0 = 5; \ + *(u64*)(r0 - 8) = r0; \ + exit; \ +l2: \ + w0 -= 0xe; \ + if w0 == 1 goto l3; \ + r0 = 4; \ + exit; \ +l1: \ + w0 -= 0x7ffffff0; \ + if w0 s>= 0xe goto l2; \ + r0 = 3; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 4a58e0398e72..6a6fad625f7e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -8,8 +8,6 @@ #include "bpf_misc.h" #include <../../../tools/include/linux/filter.h> -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - int vals[] SEC(".data.vals") = {1, 2, 3, 4}; __naked __noinline __used diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c index 49e712acbf60..f8d3ae0c29ae 100644 --- a/tools/testing/selftests/bpf/progs/wq.c +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -32,6 +32,7 @@ struct { } hmap_malloc SEC(".maps"); struct elem { + int ok_offset; struct bpf_wq w; }; @@ -53,7 +54,7 @@ __u32 ok; __u32 ok_sleepable; static int test_elem_callback(void *map, int *key, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) + int (callback_fn)(void *map, int *key, void *value)) { struct elem init = {}, *val; struct bpf_wq *wq; @@ -70,6 +71,8 @@ static int test_elem_callback(void *map, int *key, if (!val) return -2; + val->ok_offset = *key; + wq = &val->w; if (bpf_wq_init(wq, map, 0) != 0) return -3; @@ -84,7 +87,7 @@ static int test_elem_callback(void *map, int *key, } static int test_hmap_elem_callback(void *map, int *key, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) + int (callback_fn)(void *map, int *key, void *value)) { struct hmap_elem init = {}, *val; struct bpf_wq *wq; @@ -114,7 +117,7 @@ static int test_hmap_elem_callback(void *map, int *key, } /* callback for non sleepable workqueue */ -static int wq_callback(void *map, int *key, struct bpf_wq *work) +static int wq_callback(void *map, int *key, void *value) { bpf_kfunc_common_test(); ok |= (1 << *key); @@ -122,10 +125,16 @@ static int wq_callback(void *map, int *key, struct bpf_wq *work) } /* callback for sleepable workqueue */ -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *value) { + struct elem *data = (struct elem *)value; + int offset = data->ok_offset; + + if (*key != offset) + return 0; + bpf_kfunc_call_test_sleepable(); - ok_sleepable |= (1 << *key); + ok_sleepable |= (1 << offset); return 0; } diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 4cbdb425f223..25b51a72fe0f 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -28,14 +28,14 @@ struct { } lru SEC(".maps"); /* callback for non sleepable workqueue */ -static int wq_callback(void *map, int *key, struct bpf_wq *work) +static int wq_callback(void *map, int *key, void *value) { bpf_kfunc_common_test(); return 0; } /* callback for sleepable workqueue */ -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *value) { bpf_kfunc_call_test_sleepable(); return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c new file mode 100644 index 000000000000..7fdc7b23ee74 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86dd +#define IP_MF 0x2000 /* "More Fragments" */ +#define IP_OFFSET 0x1fff /* "Fragment Offset" */ +#define AF_INET 2 +#define AF_INET6 10 + +struct bpf_flowtable_opts___local { + s32 error; +}; + +struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *, + struct bpf_flowtable_opts___local *, u32) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} stats SEC(".maps"); + +static bool xdp_flowtable_offload_check_iphdr(struct iphdr *iph) +{ + /* ip fragmented traffic */ + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return false; + + /* ip options */ + if (iph->ihl * 4 != sizeof(*iph)) + return false; + + if (iph->ttl <= 1) + return false; + + return true; +} + +static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end, + u8 proto) +{ + if (proto == IPPROTO_TCP) { + struct tcphdr *tcph = ports; + + if (tcph + 1 > data_end) + return false; + + if (tcph->fin || tcph->rst) + return false; + } + + return true; +} + +struct flow_ports___local { + __be16 source, dest; +} __attribute__((preserve_access_index)); + +SEC("xdp.frags") +int xdp_flowtable_do_lookup(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + struct bpf_flowtable_opts___local opts = {}; + struct flow_offload_tuple_rhash *tuplehash; + struct bpf_fib_lookup tuple = { + .ifindex = ctx->ingress_ifindex, + }; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct flow_ports___local *ports; + __u32 *val, key = 0; + + if (eth + 1 > data_end) + return XDP_DROP; + + switch (eth->h_proto) { + case bpf_htons(ETH_P_IP): { + struct iphdr *iph = data + sizeof(*eth); + + ports = (struct flow_ports___local *)(iph + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + /* sanity check on ip header */ + if (!xdp_flowtable_offload_check_iphdr(iph)) + return XDP_PASS; + + if (!xdp_flowtable_offload_check_tcp_state(ports, data_end, + iph->protocol)) + return XDP_PASS; + + tuple.family = AF_INET; + tuple.tos = iph->tos; + tuple.l4_protocol = iph->protocol; + tuple.tot_len = bpf_ntohs(iph->tot_len); + tuple.ipv4_src = iph->saddr; + tuple.ipv4_dst = iph->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + case bpf_htons(ETH_P_IPV6): { + struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src; + struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + ports = (struct flow_ports___local *)(ip6h + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + if (ip6h->hop_limit <= 1) + return XDP_PASS; + + if (!xdp_flowtable_offload_check_tcp_state(ports, data_end, + ip6h->nexthdr)) + return XDP_PASS; + + tuple.family = AF_INET6; + tuple.l4_protocol = ip6h->nexthdr; + tuple.tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + default: + return XDP_PASS; + } + + tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts)); + if (!tuplehash) + return XDP_PASS; + + val = bpf_map_lookup_elem(&stats, &key); + if (val) + __sync_add_and_fetch(val, 1); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 7ea9785738b5..f8f5dc9f72b8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#define BPF_NO_KFUNC_PROTOTYPES #include "vmlinux.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c index f6a501fbba2b..a1d9f106c3f0 100644 --- a/tools/testing/selftests/bpf/progs/xfrm_info.c +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include "vmlinux.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 524c38e9cde4..f14e10b0de96 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/capability.h> #include <stdlib.h> +#include <regex.h> #include <test_progs.h> #include <bpf/btf.h> @@ -17,9 +18,11 @@ #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_EXPECT_REGEX_PFX "comment:test_expect_regex=" #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv" #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv" #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv=" +#define TEST_TAG_EXPECT_REGEX_PFX_UNPRIV "comment:test_expect_regex_unpriv=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" #define TEST_TAG_DESCRIPTION_PFX "comment:test_description=" @@ -46,10 +49,16 @@ enum mode { UNPRIV = 2 }; +struct expect_msg { + const char *substr; /* substring match */ + const char *regex_str; /* regex-based match */ + regex_t regex; +}; + struct test_subspec { char *name; bool expect_failure; - const char **expect_msgs; + struct expect_msg *expect_msgs; size_t expect_msg_cnt; int retval; bool execute; @@ -89,6 +98,16 @@ void test_loader_fini(struct test_loader *tester) static void free_test_spec(struct test_spec *spec) { + int i; + + /* Deallocate expect_msgs arrays. */ + for (i = 0; i < spec->priv.expect_msg_cnt; i++) + if (spec->priv.expect_msgs[i].regex_str) + regfree(&spec->priv.expect_msgs[i].regex); + for (i = 0; i < spec->unpriv.expect_msg_cnt; i++) + if (spec->unpriv.expect_msgs[i].regex_str) + regfree(&spec->unpriv.expect_msgs[i].regex); + free(spec->priv.name); free(spec->unpriv.name); free(spec->priv.expect_msgs); @@ -100,18 +119,38 @@ static void free_test_spec(struct test_spec *spec) spec->unpriv.expect_msgs = NULL; } -static int push_msg(const char *msg, struct test_subspec *subspec) +static int push_msg(const char *substr, const char *regex_str, struct test_subspec *subspec) { void *tmp; + int regcomp_res; + char error_msg[100]; + struct expect_msg *msg; - tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *)); + tmp = realloc(subspec->expect_msgs, + (1 + subspec->expect_msg_cnt) * sizeof(struct expect_msg)); if (!tmp) { ASSERT_FAIL("failed to realloc memory for messages\n"); return -ENOMEM; } subspec->expect_msgs = tmp; - subspec->expect_msgs[subspec->expect_msg_cnt++] = msg; + msg = &subspec->expect_msgs[subspec->expect_msg_cnt]; + + if (substr) { + msg->substr = substr; + msg->regex_str = NULL; + } else { + msg->regex_str = regex_str; + msg->substr = NULL; + regcomp_res = regcomp(&msg->regex, regex_str, REG_EXTENDED|REG_NEWLINE); + if (regcomp_res != 0) { + regerror(regcomp_res, &msg->regex, error_msg, sizeof(error_msg)); + PRINT_FAIL("Regexp compilation error in '%s': '%s'\n", + regex_str, error_msg); + return -EINVAL; + } + } + subspec->expect_msg_cnt += 1; return 0; } @@ -233,13 +272,25 @@ static int parse_test_spec(struct test_loader *tester, spec->mode_mask |= UNPRIV; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; - err = push_msg(msg, &spec->priv); + err = push_msg(msg, NULL, &spec->priv); if (err) goto cleanup; spec->mode_mask |= PRIV; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) { msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1; - err = push_msg(msg, &spec->unpriv); + err = push_msg(msg, NULL, &spec->unpriv); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX)) { + msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX) - 1; + err = push_msg(NULL, msg, &spec->priv); + if (err) + goto cleanup; + spec->mode_mask |= PRIV; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX_UNPRIV)) { + msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX_UNPRIV) - 1; + err = push_msg(NULL, msg, &spec->unpriv); if (err) goto cleanup; spec->mode_mask |= UNPRIV; @@ -337,16 +388,13 @@ static int parse_test_spec(struct test_loader *tester, } if (!spec->unpriv.expect_msgs) { - size_t sz = spec->priv.expect_msg_cnt * sizeof(void *); + for (i = 0; i < spec->priv.expect_msg_cnt; i++) { + struct expect_msg *msg = &spec->priv.expect_msgs[i]; - spec->unpriv.expect_msgs = malloc(sz); - if (!spec->unpriv.expect_msgs) { - PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n"); - err = -ENOMEM; - goto cleanup; + err = push_msg(msg->substr, msg->regex_str, &spec->unpriv); + if (err) + goto cleanup; } - memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz); - spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt; } } @@ -402,27 +450,40 @@ static void validate_case(struct test_loader *tester, struct bpf_program *prog, int load_err) { - int i, j; + int i, j, err; + char *match; + regmatch_t reg_match[1]; for (i = 0; i < subspec->expect_msg_cnt; i++) { - char *match; - const char *expect_msg; - - expect_msg = subspec->expect_msgs[i]; + struct expect_msg *msg = &subspec->expect_msgs[i]; + + if (msg->substr) { + match = strstr(tester->log_buf + tester->next_match_pos, msg->substr); + if (match) + tester->next_match_pos = match - tester->log_buf + strlen(msg->substr); + } else { + err = regexec(&msg->regex, + tester->log_buf + tester->next_match_pos, 1, reg_match, 0); + if (err == 0) { + match = tester->log_buf + tester->next_match_pos + reg_match[0].rm_so; + tester->next_match_pos += reg_match[0].rm_eo; + } else { + match = NULL; + } + } - match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { - /* if we are in verbose mode, we've already emitted log */ if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); - for (j = 0; j < i; j++) - fprintf(stderr, - "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]); - fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); + for (j = 0; j <= i; j++) { + msg = &subspec->expect_msgs[j]; + fprintf(stderr, "%s %s: '%s'\n", + j < i ? "MATCHED " : "EXPECTED", + msg->substr ? "SUBSTR" : " REGEX", + msg->substr ?: msg->regex_str); + } return; } - - tester->next_match_pos = match - tester->log_buf + strlen(expect_msg); } } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ba5a20b19ba..51341d50213b 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -377,6 +377,15 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_OK_FD(fd, name) ({ \ + static int duration = 0; \ + int ___fd = (fd); \ + bool ___ok = ___fd >= 0; \ + CHECK(!___ok, (name), "unexpected fd: %d (errno %d)\n", \ + ___fd, errno); \ + ___ok; \ +}) + #define SYS(goto_label, fmt, ...) \ ({ \ char cmd[1024]; \ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 92752f5eeded..3e02d7267de8 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,7 +63,8 @@ int passed; int failed; int map_fd[9]; struct bpf_map *maps[9]; -int prog_fd[11]; +struct bpf_program *progs[9]; +struct bpf_link *links[9]; int txmsg_pass; int txmsg_redir; @@ -680,7 +681,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - s->bytes_recvd += recv; + if (recv > 0) + s->bytes_recvd += recv; if (opt->check_recved_len && s->bytes_recvd > total_bytes) { errno = EMSGSIZE; @@ -952,7 +954,8 @@ enum { static int run_options(struct sockmap_options *options, int cg_fd, int test) { - int i, key, next_key, err, tx_prog_fd = -1, zero = 0; + int i, key, next_key, err, zero = 0; + struct bpf_program *tx_prog; /* If base test skip BPF setup */ if (test == BASE || test == BASE_SENDPAGE) @@ -960,48 +963,44 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to sockmap */ if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]); + if (!links[0]) { fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[0], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[1], map_fd[0], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]); + if (!links[1]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n", + strerror(errno)); + return -1; } /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[2] = bpf_program__attach_sockmap(progs[0], map_fd[8]); + if (!links[2]) { fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (TLS sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[8], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[2], map_fd[8], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[3] = bpf_program__attach_sockmap(progs[2], map_fd[8]); + if (!links[3]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (TLS sockmap): (%s)\n", + strerror(errno)); + return -1; } } /* Attach to cgroups */ - err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + err = bpf_prog_attach(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", err, strerror(errno)); @@ -1017,30 +1016,31 @@ run: /* Attach txmsg program to sockmap */ if (txmsg_pass) - tx_prog_fd = prog_fd[4]; + tx_prog = progs[4]; else if (txmsg_redir) - tx_prog_fd = prog_fd[5]; + tx_prog = progs[5]; else if (txmsg_apply) - tx_prog_fd = prog_fd[6]; + tx_prog = progs[6]; else if (txmsg_cork) - tx_prog_fd = prog_fd[7]; + tx_prog = progs[7]; else if (txmsg_drop) - tx_prog_fd = prog_fd[8]; + tx_prog = progs[8]; else - tx_prog_fd = 0; + tx_prog = NULL; - if (tx_prog_fd) { - int redir_fd, i = 0; + if (tx_prog) { + int redir_fd; - err = bpf_prog_attach(tx_prog_fd, - map_fd[1], BPF_SK_MSG_VERDICT, 0); - if (err) { + links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]); + if (!links[4]) { fprintf(stderr, - "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", - err, strerror(errno)); + "ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n", + strerror(errno)); + err = -1; goto out; } + i = 0; err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); if (err) { fprintf(stderr, @@ -1279,16 +1279,14 @@ run: fprintf(stderr, "unknown test\n"); out: /* Detatch and zero all the maps */ - bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); - bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); - bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); + bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS); - if (tx_prog_fd >= 0) - bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); + for (i = 0; i < ARRAY_SIZE(links); i++) { + if (links[i]) + bpf_link__detach(links[i]); + } - for (i = 0; i < 8; i++) { + for (i = 0; i < ARRAY_SIZE(map_fd); i++) { key = next_key = 0; bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) { @@ -1783,34 +1781,6 @@ char *map_names[] = { "tls_sock_map", }; -int prog_attach_type[] = { - BPF_SK_SKB_STREAM_PARSER, - BPF_SK_SKB_STREAM_VERDICT, - BPF_SK_SKB_STREAM_VERDICT, - BPF_CGROUP_SOCK_OPS, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, -}; - -int prog_type[] = { - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SOCK_OPS, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, -}; - static int populate_progs(char *bpf_file) { struct bpf_program *prog; @@ -1829,17 +1799,10 @@ static int populate_progs(char *bpf_file) return -1; } - bpf_object__for_each_program(prog, obj) { - bpf_program__set_type(prog, prog_type[i]); - bpf_program__set_expected_attach_type(prog, - prog_attach_type[i]); - i++; - } - i = bpf_object__load(obj); i = 0; bpf_object__for_each_program(prog, obj) { - prog_fd[i] = bpf_program__fd(prog); + progs[i] = prog; i++; } @@ -1853,6 +1816,9 @@ static int populate_progs(char *bpf_file) } } + for (i = 0; i < ARRAY_SIZE(links); i++) + links[i] = NULL; + return 0; } @@ -1970,7 +1936,6 @@ static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt) static int test_selftest(int cg_fd, struct sockmap_options *opt) { - test_selftests_sockmap(cg_fd, opt); test_selftests_sockhash(cg_fd, opt); test_selftests_ktls(cg_fd, opt); diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 7b5fc98838cd..3844f9b8232a 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -139,14 +139,14 @@ out: return ret; } -static int v6only_true(int fd, const struct post_socket_opts *opts) +static int v6only_true(int fd, void *opts) { int mode = true; return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); } -static int v6only_false(int fd, const struct post_socket_opts *opts) +static int v6only_false(int fd, void *opts) { int mode = false; @@ -156,10 +156,6 @@ static int v6only_false(int fd, const struct post_socket_opts *opts) int main(int argc, char **argv) { struct network_helper_opts opts = { 0 }; - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; - struct sockaddr_in addr4dual; - struct sockaddr_in6 addr6dual; int server = -1; int server_v6 = -1; int server_dual = -1; @@ -181,36 +177,17 @@ int main(int argc, char **argv) goto err; } - memset(&addr4, 0, sizeof(addr4)); - addr4.sin_family = AF_INET; - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr4.sin_port = 0; - memcpy(&addr4dual, &addr4, sizeof(addr4dual)); - - memset(&addr6, 0, sizeof(addr6)); - addr6.sin6_family = AF_INET6; - addr6.sin6_addr = in6addr_loopback; - addr6.sin6_port = 0; - - memset(&addr6dual, 0, sizeof(addr6dual)); - addr6dual.sin6_family = AF_INET6; - addr6dual.sin6_addr = in6addr_any; - addr6dual.sin6_port = 0; - - server = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr4, - sizeof(addr4), NULL); + server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL); if (server == -1) goto err; opts.post_socket_cb = v6only_true; - server_v6 = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6, - sizeof(addr6), &opts); + server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); if (server_v6 == -1) goto err; opts.post_socket_cb = v6only_false; - server_dual = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6dual, - sizeof(addr6dual), &opts); + server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts); if (server_dual == -1) goto err; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df04bda1c927..610392dfc4fb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1237,11 +1237,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } -struct libcap { - struct __user_cap_header_struct hdr; - struct __user_cap_data_struct data[2]; -}; - static int set_admin(bool admin) { int err; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 70e29f316fe7..465d196c7165 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -211,7 +211,7 @@ long ksym_get_addr(const char *name) */ int kallsyms_find(const char *sym, unsigned long long *addr) { - char type, name[500]; + char type, name[500], *match; unsigned long long value; int err = 0; FILE *f; @@ -221,6 +221,17 @@ int kallsyms_find(const char *sym, unsigned long long *addr) return -EINVAL; while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function + * symbols could be promoted to global due to cross-file inlining. + * For such cases, clang compiler will add .llvm.<hash> suffix + * to those symbols to avoid potential naming conflict. + * Let us ignore .llvm.<hash> suffix during symbol comparison. + */ + if (type == 'd') { + match = strstr(name, ".llvm."); + if (match) + *match = '\0'; + } if (strcmp(name, sym) == 0) { *addr = value; goto out; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index ab25a81fd3a1..d0cdd156cd55 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc", + .errstr = "arg#0 expected pointer to ctx, but got PTR", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, @@ -276,6 +276,19 @@ .result = ACCEPT, }, { + "calls: invalid kfunc call: must provide (attach_prog_fd, btf_id) pair when freplace", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_EXT, + .result = REJECT, + .errstr = "Tracing programs must provide btf_id", + .fixup_kfunc_btf_id = { + { "bpf_dynptr_from_skb", 0 }, + }, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0a9293a57211..90643ccc221d 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -39,12 +39,12 @@ .result = VERBOSE_ACCEPT, .errstr = "mark_precise: frame0: last_idx 26 first_idx 20\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ @@ -100,11 +100,11 @@ .errstr = "26: (85) call bpf_probe_read_kernel#113\ mark_precise: frame0: last_idx 26 first_idx 22\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2,r9 stack= before 20\ mark_precise: frame0: parent state regs=r2,r9 stack=:\ diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 2eac0895b0a1..8144fd145237 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -196,6 +196,12 @@ static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem }; int ret; + if (umem->fill_size) + cfg.fill_size = umem->fill_size; + + if (umem->comp_size) + cfg.comp_size = umem->comp_size; + if (umem->unaligned_mode) cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -265,6 +271,10 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags |= XDP_SHARED_UMEM; if (ifobject->mtu > MAX_ETH_PKT_SIZE) cfg.bind_flags |= XDP_USE_SG; + if (umem->comp_size) + cfg.tx_size = umem->comp_size; + if (umem->fill_size) + cfg.rx_size = umem->fill_size; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; @@ -1616,7 +1626,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) buffers_to_fill = umem->num_frames; else - buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; + buffers_to_fill = umem->fill_size; ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); if (ret != buffers_to_fill) @@ -1899,11 +1909,15 @@ static int testapp_validate_traffic(struct test_spec *test) } if (test->set_ring) { - if (ifobj_tx->hw_ring_size_supp) - return set_ring_size(ifobj_tx); - - ksft_test_result_skip("Changing HW ring size not supported.\n"); - return TEST_SKIP; + if (ifobj_tx->hw_ring_size_supp) { + if (set_ring_size(ifobj_tx)) { + ksft_test_result_skip("Failed to change HW ring size.\n"); + return TEST_FAILURE; + } + } else { + ksft_test_result_skip("Changing HW ring size not supported.\n"); + return TEST_SKIP; + } } xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); @@ -2441,7 +2455,7 @@ static int testapp_hw_sw_min_ring_size(struct test_spec *test) static int testapp_hw_sw_max_ring_size(struct test_spec *test) { - u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; + u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4; int ret; test->set_ring = true; @@ -2449,7 +2463,8 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; test->ifobj_rx->umem->num_frames = max_descs; - test->ifobj_rx->xsk->rxqsize = max_descs; + test->ifobj_rx->umem->fill_size = max_descs; + test->ifobj_rx->umem->comp_size = max_descs; test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; @@ -2457,9 +2472,12 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) if (ret) return ret; - /* Set batch_size to 4095 */ - test->ifobj_tx->xsk->batch_size = max_descs - 1; - test->ifobj_rx->xsk->batch_size = max_descs - 1; + /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when + * updating the Rx HW tail register. + */ + test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + pkt_stream_replace(test, max_descs, MIN_PKT_SIZE); return testapp_validate_traffic(test); } diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 906de5fab7a3..885c948c5d83 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -80,6 +80,8 @@ struct xsk_umem_info { void *buffer; u32 frame_size; u32 base_addr; + u32 fill_size; + u32 comp_size; bool unaligned_mode; }; diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index b8703c499d28..dfec31fb9b30 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -130,7 +130,6 @@ int run_test(int cpu) void suspend(void) { int power_state_fd; - struct sigevent event = {}; int timerfd; int err; struct itimerspec spec = {}; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 2732e0b29271..952e4448bf07 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1,11 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -test_memcontrol test_core -test_freezer -test_kmem -test_kill test_cpu test_cpuset -test_zswap +test_freezer test_hugetlb_memcg +test_kill +test_kmem +test_memcontrol +test_pids +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 16461dc0ffdf..1b897152bab6 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -6,26 +6,29 @@ all: ${HELPER_PROGS} TEST_FILES := with_stress.sh TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh TEST_GEN_FILES := wait_inotify -TEST_GEN_PROGS = test_memcontrol -TEST_GEN_PROGS += test_kmem -TEST_GEN_PROGS += test_core -TEST_GEN_PROGS += test_freezer -TEST_GEN_PROGS += test_kill +# Keep the lists lexicographically sorted +TEST_GEN_PROGS = test_core TEST_GEN_PROGS += test_cpu TEST_GEN_PROGS += test_cpuset -TEST_GEN_PROGS += test_zswap +TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_hugetlb_memcg +TEST_GEN_PROGS += test_kill +TEST_GEN_PROGS += test_kmem +TEST_GEN_PROGS += test_memcontrol +TEST_GEN_PROGS += test_pids +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c $(OUTPUT)/test_cpuset: cgroup_util.c -$(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_hugetlb_memcg: cgroup_util.c +$(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_kmem: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c +$(OUTPUT)/test_pids: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config index 97d549ee894f..39f979690dd3 100644 --- a/tools/testing/selftests/cgroup/config +++ b/tools/testing/selftests/cgroup/config @@ -3,5 +3,4 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_SCHED=y CONFIG_MEMCG=y -CONFIG_MEMCG_KMEM=y CONFIG_PAGE_COUNTER=y diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index b5eb1be2248c..7c08cc153367 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -28,6 +28,14 @@ CPULIST=$(cat $CGROUP2/cpuset.cpus.effective) NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") [[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" +# Check to see if /dev/console exists and is writable +if [[ -c /dev/console && -w /dev/console ]] +then + CONSOLE=/dev/console +else + CONSOLE=/dev/null +fi + # Set verbose flag and delay factor PROG=$1 VERBOSE=0 @@ -103,8 +111,8 @@ console_msg() { MSG=$1 echo "$MSG" - echo "" > /dev/console - echo "$MSG" > /dev/console + echo "" > $CONSOLE + echo "$MSG" > $CONSOLE pause 0.01 } @@ -161,6 +169,14 @@ test_add_proc() # T = put a task into cgroup # O<c>=<v> = Write <v> to CPU online file of <c> # +# ECPUs - effective CPUs of cpusets +# Pstate - partition root state +# ISOLCPUS - isolated CPUs (<icpus>[,<icpus2>]) +# +# Note that if there are 2 fields in ISOLCPUS, the first one is for +# sched-debug matching which includes offline CPUs and single-CPU partitions +# while the second one is for matching cpuset.cpus.isolated. +# SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS @@ -220,23 +236,29 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" # Nested remote/local partition tests " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ + A1:P0,A2:P1,A3:P0,B1:P1" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ A1:P0,A2:P2,A3:P1 2-4,2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ + A1:P0,A2:P2,A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ A1:P0,A2:P-2,A3:P-1" @@ -262,8 +284,8 @@ TEST_MATRIX=( . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" @@ -274,32 +296,26 @@ TEST_MATRIX=( " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" # Local partition CPU change tests " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ + . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ + . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS @@ -346,6 +362,9 @@ TEST_MATRIX=( " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it + " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: @@ -355,6 +374,9 @@ TEST_MATRIX=( # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + + # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" ) # @@ -556,14 +578,15 @@ check_cgroup_states() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 + CGRP_DIR=$CGRP STATE=$2 FILE= EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 + [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 case $STATE in - P*) FILE=$CGRP/cpuset.cpus.partition + P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; *) echo "Unknown state: $STATE!" exit 1 @@ -587,6 +610,16 @@ check_cgroup_states() ;; esac [[ $EVAL != $VAL ]] && return 1 + + # + # For root partition, dump sched-domains info to console if + # verbose mode set for manual comparison with sched debug info. + # + [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { + DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) + [[ -n "$DOMS" ]] && + echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + } done return 0 } @@ -694,9 +727,9 @@ null_isolcpus_check() [[ $VERBOSE -gt 0 ]] || return 0 # Retry a few times before printing error RETRY=0 - while [[ $RETRY -lt 5 ]] + while [[ $RETRY -lt 8 ]] do - pause 0.01 + pause 0.02 check_isolcpus "." [[ $? -eq 0 ]] && return 0 ((RETRY++)) @@ -726,7 +759,7 @@ run_state_test() while [[ $I -lt $CNT ]] do - echo "Running test $I ..." > /dev/console + echo "Running test $I ..." > $CONSOLE [[ $VERBOSE -gt 1 ]] && { echo "" eval echo \${$TEST[$I]} @@ -783,7 +816,7 @@ run_state_test() while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times - pause 0.01 + pause 0.02 ((RETRY++)) NEWLIST=$(cat cpuset.cpus.effective) done diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c new file mode 100644 index 000000000000..9ecb83c6cc5c --- /dev/null +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/limits.h> +#include <signal.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int run_success(const char *cgroup, void *arg) +{ + return 0; +} + +static int run_pause(const char *cgroup, void *arg) +{ + return pause(); +} + +/* + * This test checks that pids.max prevents forking new children above the + * specified limit in the cgroup. + */ +static int test_pids_max(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_pids; + int pid; + + cg_pids = cg_name(root, "pids_test"); + if (!cg_pids) + goto cleanup; + + if (cg_create(cg_pids)) + goto cleanup; + + if (cg_read_strcmp(cg_pids, "pids.max", "max\n")) + goto cleanup; + + if (cg_write(cg_pids, "pids.max", "2")) + goto cleanup; + + if (cg_enter_current(cg_pids)) + goto cleanup; + + pid = cg_run_nowait(cg_pids, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_pids, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + cg_destroy(cg_pids); + free(cg_pids); + + return ret; +} + +/* + * This test checks that pids.events are counted in cgroup associated with pids.max + */ +static int test_pids_events(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_parent = NULL, *cg_child = NULL; + int pid; + + cg_parent = cg_name(root, "pids_parent"); + cg_child = cg_name(cg_parent, "pids_child"); + if (!cg_parent || !cg_child) + goto cleanup; + + if (cg_create(cg_parent)) + goto cleanup; + if (cg_write(cg_parent, "cgroup.subtree_control", "+pids")) + goto cleanup; + if (cg_create(cg_child)) + goto cleanup; + + if (cg_write(cg_parent, "pids.max", "2")) + goto cleanup; + + if (cg_read_strcmp(cg_child, "pids.max", "max\n")) + goto cleanup; + + if (cg_enter_current(cg_child)) + goto cleanup; + + pid = cg_run_nowait(cg_child, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_child, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + if (cg_read_key_long(cg_child, "pids.events", "max ") != 0) + goto cleanup; + if (cg_read_key_long(cg_parent, "pids.events", "max ") != 1) + goto cleanup; + + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_child) + cg_destroy(cg_child); + if (cg_parent) + cg_destroy(cg_parent); + free(cg_child); + free(cg_parent); + + return ret; +} + + + +#define T(x) { x, #x } +struct pids_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_pids_max), + T(test_pids_events), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + if (cg_find_unified_root(root, sizeof(root), NULL)) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that pids controller is available: + * pids is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "pids")) + ksft_exit_skip("pids controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "pids")) + if (cg_write(root, "cgroup.subtree_control", "+pids")) + ksft_exit_skip("Failed to set pids controller\n"); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + ksft_finished(); +} diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 29a22f50e762..1e2e98cc809d 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,7 +4,7 @@ TEST_GEN_FILES += huge_count_read_write TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race TEST_GEN_FILES += debugfs_target_ids_pid_leak -TEST_GEN_FILES += access_memory +TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _chk_dependency.sh _debugfs_common.sh @@ -13,6 +13,7 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh TEST_PROGS += sysfs.sh TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py +TEST_PROGS += damos_tried_regions.py damon_nr_regions.py TEST_PROGS += reclaim.sh lru_sort.sh # regression tests (reproducers of previously found bugs) diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index 2bd44c32be1b..6e136dc3df19 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -175,16 +175,24 @@ class DamosStats: self.sz_applied = sz_applied self.qt_exceeds = qt_exceeds +class DamosTriedRegion: + def __init__(self, start, end, nr_accesses, age): + self.start = start + self.end = end + self.nr_accesses = nr_accesses + self.age = age + class Damos: action = None access_pattern = None quota = None apply_interval_us = None - # todo: Support watermarks, stats, tried_regions + # todo: Support watermarks, stats idx = None context = None tried_bytes = None stats = None + tried_regions = None def __init__(self, action='stat', access_pattern=DamosAccessPattern(), quota=DamosQuota(), apply_interval_us=0): @@ -398,6 +406,35 @@ class Kdamond: err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') return err + def stop(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'off') + return err + + def update_schemes_tried_regions(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_regions') + if err is not None: + return err + for context in self.contexts: + for scheme in context.schemes: + tried_regions = [] + tried_regions_dir = os.path.join( + scheme.sysfs_dir(), 'tried_regions') + for filename in os.listdir( + os.path.join(scheme.sysfs_dir(), 'tried_regions')): + tried_region_dir = os.path.join(tried_regions_dir, filename) + if not os.path.isdir(tried_region_dir): + continue + region_values = [] + for f in ['start', 'end', 'nr_accesses', 'age']: + content, err = read_file( + os.path.join(tried_region_dir, f)) + if err is not None: + return err + region_values.append(int(content)) + tried_regions.append(DamosTriedRegion(*region_values)) + scheme.tried_regions = tried_regions + def update_schemes_tried_bytes(self): err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'update_schemes_tried_bytes') @@ -444,6 +481,25 @@ class Kdamond: goal.effective_bytes = int(content) return None + def commit(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err is not None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err is not None: + return err + + for context in self.contexts: + err = context.stage() + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'commit') + return err + + def commit_schemes_quota_goals(self): for context in self.contexts: for scheme in context.schemes: @@ -478,3 +534,10 @@ class Kdamonds: if err is not None: return err return None + + def stop(self): + for kdamond in self.kdamonds: + err = kdamond.stop() + if err is not None: + return err + return None diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c index 585a2fa54329..56b17e8fe1be 100644 --- a/tools/testing/selftests/damon/access_memory.c +++ b/tools/testing/selftests/damon/access_memory.c @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) start_clock = clock(); while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC < access_time_ms) - memset(regions[i], i, 1024 * 1024 * 10); + memset(regions[i], i, sz_region); } return 0; } diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c new file mode 100644 index 000000000000..3be121487432 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + * + * Receives number of regions and size of each region from user. Allocate the + * regions and repeatedly access even numbered (starting from zero) regions. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 3) { + printf("Usage: %s <number> <size (bytes)>\n", argv[0]); + return -1; + } + + nr_regions = atoi(argv[1]); + sz_region = atoi(argv[2]); + + regions = malloc(sizeof(*regions) * nr_regions); + for (i = 0; i < nr_regions; i++) + regions[i] = malloc(sz_region); + + while (1) { + for (i = 0; i < nr_regions; i++) { + if (i % 2 == 0) + memset(regions[i], i, sz_region); + } + } + return 0; +} diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py new file mode 100644 index 000000000000..2e8a74aff543 --- /dev/null +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): + ''' + Create process of the given 'real_nr_regions' regions, monitor it using + DAMON with given '{min,max}_nr_regions' monitoring parameter. + + Exit with non-zero return code if the given {min,max}_nr_regions is not + kept. + ''' + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '%d' % real_nr_regions, + '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + min_nr_regions=min_nr_regions, + max_nr_regions=max_nr_regions), + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + collected_nr_regions = [] + while proc.poll() is None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + collected_nr_regions.append(nr_tried_regions) + if len(collected_nr_regions) > 10: + break + proc.terminate() + kdamonds.stop() + + test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( + real_nr_regions, min_nr_regions, max_nr_regions) + if (collected_nr_regions[0] < min_nr_regions or + collected_nr_regions[-1] > max_nr_regions): + print('fail %s' % test_name) + print('number of regions that collected are:') + for nr in collected_nr_regions: + print(nr) + exit(1) + print('pass %s ' % test_name) + +def main(): + # test min_nr_regions larger than real nr regions + test_nr_regions(10, 20, 100) + + # test max_nr_regions smaller than real nr regions + test_nr_regions(15, 3, 10) + + # test online-tuned max_nr_regions that smaller than real nr regions + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + monitoring_attrs=_damon_sysfs.DamonAttrs( + min_nr_regions=10, max_nr_regions=1000), + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + # wait until the real regions are found + time.sleep(3) + + attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs + attrs.min_nr_regions = 3 + attrs.max_nr_regions = 7 + err = kdamonds.kdamonds[0].commit() + if err is not None: + proc.terminate() + print('commit failed: %s' % err) + exit(1) + # wait for next merge operation is executed + time.sleep(0.3) + + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + proc.terminate() + + if nr_tried_regions > 7: + print('fail online-tuned max_nr_regions: %d > 7' % nr_tried_regions) + exit(1) + print('pass online-tuned max_nr_regions') + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/damos_tried_regions.py b/tools/testing/selftests/damon/damos_tried_regions.py new file mode 100644 index 000000000000..3b347eb28bd2 --- /dev/null +++ b/tools/testing/selftests/damon/damos_tried_regions.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + # repeatedly access even-numbered ones in 14 regions of 10 MiB size + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region]) + + # stat every monitored regions + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos(action='stat', + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err is not None: + proc.terminate() + print('kdamond start failed: %s' % err) + exit(1) + + collected_nr_regions = [] + while proc.poll() is None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_regions() + if err is not None: + proc.terminate() + print('tried regions update failed: %s' % err) + exit(1) + + scheme = kdamonds.kdamonds[0].contexts[0].schemes[0] + if scheme.tried_regions is None: + proc.terminate() + print('tried regions is not collected') + exit(1) + + nr_tried_regions = len(scheme.tried_regions) + if nr_tried_regions <= 0: + proc.terminate() + print('tried regions is not created') + exit(1) + collected_nr_regions.append(nr_tried_regions) + if len(collected_nr_regions) > 10: + break + proc.terminate() + + collected_nr_regions.sort() + sample = collected_nr_regions[4] + print('50-th percentile nr_regions: %d' % sample) + print('expectation (>= 14) is %s' % 'met' if sample >= 14 else 'not met') + if collected_nr_regions[4] < 14: + print('full nr_regions:') + print('\n'.join(collected_nr_regions)) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/devices/Makefile b/tools/testing/selftests/devices/Makefile deleted file mode 100644 index ca29249b30c3..000000000000 --- a/tools/testing/selftests/devices/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -TEST_PROGS := test_discoverable_devices.py -TEST_FILES := boards ksft.py - -include ../lib.mk diff --git a/tools/testing/selftests/devices/error_logs/Makefile b/tools/testing/selftests/devices/error_logs/Makefile new file mode 100644 index 000000000000..d546c3fb0a7f --- /dev/null +++ b/tools/testing/selftests/devices/error_logs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := test_device_error_logs.py + +include ../../lib.mk diff --git a/tools/testing/selftests/devices/error_logs/test_device_error_logs.py b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py new file mode 100755 index 000000000000..3dd56c8ec92c --- /dev/null +++ b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2024 Collabora Ltd +# +# This test checks for the presence of error (or more critical) log messages +# coming from devices in the kernel log. +# +# One failed test case is reported for each device that has outputted error +# logs. Devices with no errors do not produce a passing test case to avoid +# polluting the results, therefore a successful run will list 0 tests run. +# + +import glob +import os +import re +import sys + +# Allow ksft module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../../kselftest/")) + +import ksft + +kmsg = "/dev/kmsg" + +RE_log = re.compile( + r"(?P<prefix>[0-9]+),(?P<sequence>[0-9]+),(?P<timestamp>[0-9]+),(?P<flag>[^;]*)(,[^;]*)*;(?P<message>.*)" +) +RE_tag = re.compile(r" (?P<key>[^=]+)=(?P<value>.*)") + +PREFIX_ERROR = 3 + +logs = [] +error_log_per_device = {} + + +def parse_kmsg(): + current_log = {} + + with open(kmsg) as f: + os.set_blocking(f.fileno(), False) + + for line in f: + tag_line = RE_tag.match(line) + log_line = RE_log.match(line) + + if log_line: + if current_log: + logs.append(current_log) # Save last log + + current_log = { + "prefix": int(log_line.group("prefix")), + "sequence": int(log_line.group("sequence")), + "timestamp": int(log_line.group("timestamp")), + "flag": log_line.group("flag"), + "message": log_line.group("message"), + } + elif tag_line: + current_log[tag_line.group("key")] = tag_line.group("value") + + +def generate_per_device_error_log(): + for log in logs: + if log.get("DEVICE") and log["prefix"] <= PREFIX_ERROR: + if not error_log_per_device.get(log["DEVICE"]): + error_log_per_device[log["DEVICE"]] = [] + error_log_per_device[log["DEVICE"]].append(log) + + +parse_kmsg() + +generate_per_device_error_log() +num_tests = len(error_log_per_device) + +ksft.print_header() +ksft.set_plan(num_tests) + +for device in error_log_per_device: + for log in error_log_per_device[device]: + ksft.print_msg(log["message"]) + ksft.test_result_fail(device) +if num_tests == 0: + ksft.print_msg("No device error logs found") +ksft.finished() diff --git a/tools/testing/selftests/devices/probe/Makefile b/tools/testing/selftests/devices/probe/Makefile new file mode 100644 index 000000000000..f630108c3fdf --- /dev/null +++ b/tools/testing/selftests/devices/probe/Makefile @@ -0,0 +1,4 @@ +TEST_PROGS := test_discoverable_devices.py +TEST_FILES := boards + +include ../../lib.mk diff --git a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml index ff932eb19f0b..ff932eb19f0b 100644 --- a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml +++ b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml diff --git a/tools/testing/selftests/devices/boards/google,spherion.yaml b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml index 17157ecd8c14..3ea843324797 100644 --- a/tools/testing/selftests/devices/boards/google,spherion.yaml +++ b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml @@ -11,6 +11,10 @@ # this, several optional keys can be used: # - dt-mmio: identify the MMIO address of the controller as defined in the # Devicetree. +# - of-fullname-regex: regular expression to match against the OF_FULLNAME +# property. Useful when the controller's address is not unique across other +# sibling controllers. In this case, dt-mmio can't be used, and this property +# allows the matching to include parent nodes as well to make it unique. # - usb-version: for USB controllers to differentiate between USB3 and USB2 # buses sharing the same controller. # - acpi-uid: _UID property of the controller as supplied by the ACPI. Useful to diff --git a/tools/testing/selftests/devices/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py index fbae8deb593d..d94a74b8a054 100755 --- a/tools/testing/selftests/devices/test_discoverable_devices.py +++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py @@ -14,13 +14,19 @@ # the description and examples of the file structure and vocabulary. # +import argparse import glob -import ksft import os import re import sys import yaml +# Allow ksft module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../../kselftest/")) + +import ksft + pci_controllers = [] usb_controllers = [] @@ -63,6 +69,22 @@ def get_dt_mmio(sysfs_dev_dir): sysfs_dev_dir = os.path.dirname(sysfs_dev_dir) +def get_of_fullname(sysfs_dev_dir): + re_of_fullname = re.compile("OF_FULLNAME=(.*)") + of_full_name = None + + # PCI controllers' sysfs don't have an of_node, so have to read it from the + # parent + while not of_full_name: + try: + with open(os.path.join(sysfs_dev_dir, "uevent")) as f: + of_fullname = re_of_fullname.search(f.read()).group(1) + return of_fullname + except: + pass + sysfs_dev_dir = os.path.dirname(sysfs_dev_dir) + + def get_acpi_uid(sysfs_dev_dir): with open(os.path.join(sysfs_dev_dir, "firmware_node", "uid")) as f: return f.read() @@ -96,6 +118,11 @@ def find_controller_in_sysfs(controller, parent_sysfs=None): if str(controller["dt-mmio"]) != get_dt_mmio(c): continue + if controller.get("of-fullname-regex"): + re_of_fullname = re.compile(str(controller["of-fullname-regex"])) + if not re_of_fullname.match(get_of_fullname(c)): + continue + if controller.get("usb-version"): if controller["usb-version"] != get_usb_version(c): continue @@ -194,6 +221,9 @@ def generate_pathname(device): if device.get("dt-mmio"): pathname += "@" + str(device["dt-mmio"]) + if device.get("of-fullname-regex"): + pathname += "-" + str(device["of-fullname-regex"]) + if device.get("name"): pathname = pathname + "/" + device["name"] @@ -296,14 +326,24 @@ def run_test(yaml_file): parse_device_tree_node(device_tree) +parser = argparse.ArgumentParser() +parser.add_argument( + "--boards-dir", default="boards", help="Directory containing the board YAML files" +) +args = parser.parse_args() + find_pci_controller_dirs() find_usb_controller_dirs() ksft.print_header() +if not os.path.exists(args.boards_dir): + ksft.print_msg(f"Boards directory '{args.boards_dir}' doesn't exist") + ksft.exit_fail() + board_file = "" for board_filename in get_board_filenames(): - full_board_filename = os.path.join("boards", board_filename + ".yaml") + full_board_filename = os.path.join(args.boards_dir, board_filename + ".yaml") if os.path.exists(full_board_filename): board_file = full_board_filename diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 5c997f17fcbd..b12f1f9babf8 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -33,7 +33,6 @@ int main(int argc, char **argv) int granule = 1; int cmd = DMA_MAP_BENCHMARK; - char *p; while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index c812080e304e..6062723a172e 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -9,52 +9,162 @@ #include <errno.h> #include <fcntl.h> #include <malloc.h> +#include <stdbool.h> #include <sys/ioctl.h> #include <sys/syscall.h> +#include <sys/mman.h> #include <linux/memfd.h> #include <linux/udmabuf.h> +#include "../../kselftest.h" #define TEST_PREFIX "drivers/dma-buf/udmabuf" #define NUM_PAGES 4 +#define NUM_ENTRIES 4 +#define MEMFD_SIZE 1024 /* in pages */ -static int memfd_create(const char *name, unsigned int flags) +static unsigned int page_size; + +static int create_memfd_with_seals(off64_t size, bool hpage) +{ + int memfd, ret; + unsigned int flags = MFD_ALLOW_SEALING; + + if (hpage) + flags |= MFD_HUGETLB; + + memfd = memfd_create("udmabuf-test", flags); + if (memfd < 0) { + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); + } + + ret = ftruncate(memfd, size); + if (ret == -1) { + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return memfd; +} + +static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size) +{ + struct udmabuf_create_list *list; + int ubuf_fd, i; + + list = malloc(sizeof(struct udmabuf_create_list) + + sizeof(struct udmabuf_create_item) * NUM_ENTRIES); + if (!list) { + ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + for (i = 0; i < NUM_ENTRIES; i++) { + list->list[i].memfd = memfd; + list->list[i].offset = i * (memfd_size / NUM_ENTRIES); + list->list[i].size = getpagesize() * NUM_PAGES; + } + + list->count = NUM_ENTRIES; + list->flags = UDMABUF_FLAGS_CLOEXEC; + ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list); + free(list); + if (ubuf_fd < 0) { + ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return ubuf_fd; +} + +static void write_to_memfd(void *addr, off64_t size, char chr) +{ + int i; + + for (i = 0; i < size / page_size; i++) { + *((char *)addr + (i * page_size)) = chr; + } +} + +static void *mmap_fd(int fd, off64_t size) +{ + void *addr; + + addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX); + exit(KSFT_FAIL); + } + + return addr; +} + +static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size) { - return syscall(__NR_memfd_create, name, flags); + off64_t off; + int i = 0, j, k = 0, ret = 0; + char char1, char2; + + while (i < NUM_ENTRIES) { + off = i * (memfd_size / NUM_ENTRIES); + for (j = 0; j < NUM_PAGES; j++, k++) { + char1 = *((char *)addr1 + off + (j * getpagesize())); + char2 = *((char *)addr2 + (k * getpagesize())); + if (char1 != char2) { + ret = -1; + goto err; + } + } + i++; + } +err: + munmap(addr1, memfd_size); + munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize()); + return ret; } int main(int argc, char *argv[]) { struct udmabuf_create create; int devfd, memfd, buf, ret; - off_t size; - void *mem; + off64_t size; + void *addr1, *addr2; + + ksft_print_header(); + ksft_set_plan(6); devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { - printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - exit(77); + ksft_print_msg( + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + exit(KSFT_SKIP); } memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); if (memfd < 0) { - printf("%s: [skip,no-memfd]\n", TEST_PREFIX); - exit(77); + ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(KSFT_SKIP); } ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); if (ret < 0) { - printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); - exit(77); + ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(KSFT_SKIP); } - size = getpagesize() * NUM_PAGES; ret = ftruncate(memfd, size); if (ret == -1) { - printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); - exit(1); + ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(KSFT_FAIL); } memset(&create, 0, sizeof(create)); @@ -64,44 +174,86 @@ int main(int argc, char *argv[]) create.offset = getpagesize()/2; create.size = getpagesize(); buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-1]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX); /* should fail (size not multiple of page) */ create.memfd = memfd; create.offset = 0; create.size = getpagesize()/2; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-2]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX); /* should fail (not memfd) */ create.memfd = 0; /* stdin */ create.offset = 0; create.size = size; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf >= 0) { - printf("%s: [FAIL,test-3]\n", TEST_PREFIX); - exit(1); - } + if (buf >= 0) + ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX); /* should work */ + page_size = getpagesize(); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); create.memfd = memfd; create.offset = 0; create.size = size; buf = ioctl(devfd, UDMABUF_CREATE, &create); - if (buf < 0) { - printf("%s: [FAIL,test-4]\n", TEST_PREFIX); - exit(1); - } + if (buf < 0) + ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX); + + munmap(addr1, size); + close(buf); + close(memfd); + + /* should work (migration of 4k size pages)*/ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, false); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX); + + close(buf); + close(memfd); + + /* should work (migration of 2MB size huge pages)*/ + page_size = getpagesize() * 512; /* 2 MB */ + size = MEMFD_SIZE * page_size; + memfd = create_memfd_with_seals(size, true); + addr1 = mmap_fd(memfd, size); + write_to_memfd(addr1, size, 'a'); + buf = create_udmabuf_list(devfd, memfd, size); + addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize()); + write_to_memfd(addr1, size, 'b'); + ret = compare_chunks(addr1, addr2, size); + if (ret < 0) + ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX); + else + ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX); - fprintf(stderr, "%s: ok\n", TEST_PREFIX); close(buf); close(memfd); close(devfd); + + ksft_print_msg("%s: ok\n", TEST_PREFIX); + ksft_print_cnts(); + return 0; } diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 4933d045ab66..c9f2f48fc30f 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -11,6 +11,7 @@ TEST_PROGS = \ hw_stats_l3_gre.sh \ loopback.sh \ pp_alloc_fail.py \ + rss_ctx.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py new file mode 100755 index 000000000000..931dbc36ca43 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import datetime +import random +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import rand_port +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure + + +def _rss_key_str(key): + return ":".join(["{:02x}".format(x) for x in key]) + + +def _rss_key_rand(length): + return [random.randint(0, 255) for _ in range(length)] + + +def get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def get_drop_err_sum(cfg): + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + cnt = 0 + for key in ['errors', 'dropped', 'over_errors', 'fifo_errors', + 'length_errors', 'crc_errors', 'missed_errors', + 'frame_errors']: + cnt += stats["stats64"]["rx"][key] + return cnt, stats["stats64"]["tx"]["carrier_changes"] + + +def ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + # ntuple is more of a capability than a config knob, don't bother + # trying to enable it (until some driver actually needs it). + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +# Get Rx packet counts for all queues, as a simple list of integers +# if @prev is specified the prev counts will be subtracted +def _get_rx_cnts(cfg, prev=None): + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _send_traffic_check(cfg, port, name, params): + # params is a dict with 3 possible keys: + # - "target": required, which queues we expect to get iperf traffic + # - "empty": optional, which queues should see no traffic at all + # - "noise": optional, which queues we expect to see low traffic; + # used for queues of the main context, since some background + # OS activity may use those queues while we're testing + # the value for each is a list, or some other iterable containing queue ids. + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[i] for i in params['target']) + + ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) + if params.get('noise'): + ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, + "traffic on other queues:" + str(cnts)) + if params.get('empty'): + ksft_eq(sum(cnts[i] for i in params['empty']), 0, + "traffic on inactive queues: " + str(cnts)) + + +def test_rss_key_indir(cfg): + """Test basics like updating the main RSS key and indirection table.""" + + if len(_get_rx_cnts(cfg)) < 2: + KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + + data = get_rss(cfg) + want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] + for k in want_keys: + if k not in data: + raise KsftFailEx("ethtool results missing key: " + k) + if not data[k]: + raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + + key_len = len(data['rss-hash-key']) + + # Set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + + data = get_rss(cfg) + ksft_eq(key, data['rss-hash-key']) + + # Set the indirection table + ethtool(f"-X {cfg.ifname} equal 2") + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + # Check we only get traffic on the first 2 queues + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # 2 queues, 20k packets, must be at least 5k per queue + ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) + # The other queues should be unused + ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts)) + + # Restore, and check traffic gets spread again + reset_indir.exec() + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + + +def test_rss_queue_reconfigure(cfg, main_ctx=True): + """Make sure queue changes can't override requested RSS config. + + By default main RSS table should change to include all queues. + When user sets a specific RSS config the driver should preserve it, + even when queue count changes. Driver should refuse to deactivate + queues used in the user-set RSS config. + """ + + if not main_ctx: + require_ntuple(cfg) + + # Start with 4 queues, an arbitrary known number. + try: + qcnt = len(_get_rx_cnts(cfg)) + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test or qstat not supported") + + if main_ctx: + ctx_id = 0 + ctx_ref = "" + else: + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_ref = f"context {ctx_id}" + defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete") + + # Indirection table should be distributing to all queues. + data = get_rss(cfg, context=ctx_id) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(3, max(data['rss-indirection-table'])) + + # Increase queues, indirection table should be distributing to all queues. + # It's unclear whether tables of additional contexts should be reset, too. + if main_ctx: + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(4, max(data['rss-indirection-table'])) + ethtool(f"-L {cfg.ifname} combined 4") + + # Configure the table explicitly + port = rand_port() + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1") + if main_ctx: + other_key = 'empty' + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_key = 'noise' + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2) }) + + # We should be able to increase queues, but table should be left untouched + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg, context=ctx_id) + ksft_eq({0, 3}, set(data['rss-indirection-table'])) + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2, 4) }) + + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + +def test_rss_resize(cfg): + """Test resizing of the RSS table. + + Some devices dynamically increase and decrease the size of the RSS + indirection table based on the number of enabled queues. + When that happens driver must maintain the balance of entries + (preferably duplicating the smaller table). + """ + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Not enough queues for the test: {ch_max}") + + ethtool(f"-L {cfg.ifname} combined 2") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} weight 1 7") + defer(ethtool, f"-X {cfg.ifname} default") + + ethtool(f"-L {cfg.ifname} combined {ch_max}") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + ksft_eq(7, + data['rss-indirection-table'].count(1) / + data['rss-indirection-table'].count(0), + f"Table imbalance after resize: {data['rss-indirection-table']}") + + +def test_hitless_key_update(cfg): + """Test that flows may be rehashed without impacting traffic. + + Some workloads may want to rehash the flows in response to an imbalance. + Most effective way to do that is changing the RSS key. Check that changing + the key does not cause link flaps or traffic disruption. + + Disrupting traffic for key update is not a bug, but makes the key + update unusable for rehashing under load. + """ + data = get_rss(cfg) + key_len = len(data['rss-hash-key']) + + key = _rss_key_rand(key_len) + + tgen = GenerateTraffic(cfg) + try: + errors0, carrier0 = get_drop_err_sum(cfg) + t0 = datetime.datetime.now() + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + t1 = datetime.datetime.now() + errors1, carrier1 = get_drop_err_sum(cfg) + finally: + tgen.wait_pkts_and_stop(5000) + + ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_eq(errors1 - errors1, 0) + ksft_eq(carrier1 - carrier0, 0) + + +def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): + """ + Test separating traffic into RSS contexts. + The queues will be allocated 2 for each context: + ctx0 ctx1 ctx2 ctx3 + [0 1] [2 3] [4 5] [6 7] ... + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ports = [] + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" + + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + break + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + for i in range(ctx_cnt): + _send_traffic_check(cfg, ports[i], f"context {i}", + { 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) }) + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context4(cfg): + test_rss_context(cfg, 4) + + +def test_rss_context32(cfg): + test_rss_context(cfg, 32) + + +def test_rss_context4_create_with_cfg(cfg): + test_rss_context(cfg, 4, create_with_cfg=True) + + +def test_rss_context_queue_reconfigure(cfg): + test_rss_queue_reconfigure(cfg, main_ctx=False) + + +def test_rss_context_out_of_order(cfg, ctx_cnt=4): + """ + Test separating traffic into RSS contexts. + Contexts are removed in semi-random order, and steering re-tested + to make sure removal doesn't break steering to surviving contexts. + Test requires 3 contexts to work. + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ntuple = [] + ctx = [] + ports = [] + + def remove_ctx(idx): + ntuple[idx].exec() + ntuple[idx] = None + ctx[idx].exec() + ctx[idx] = None + + def check_traffic(): + for i in range(ctx_cnt): + if ctx[i]: + expected = { + 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) + } + else: + expected = { + 'target': (0, 1), + 'empty': range(2, 2+2*ctx_cnt) + } + + _send_traffic_check(cfg, ports[i], f"context {i}", expected) + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) + + check_traffic() + + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() + + # Remove first context + remove_ctx(0) + check_traffic() + + # Remove last context + remove_ctx(-1) + check_traffic() + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context_overlap(cfg, other_ctx=0): + """ + Test contexts overlapping with each other. + Use 4 queues for the main context, but only queues 2 and 3 for context 1. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + if other_ctx == 0: + ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_ctx = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") + + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[2:4]) + ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts)) + ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + +def test_rss_context_overlap2(cfg): + test_rss_context_overlap(cfg, True) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, + test_rss_resize, test_hitless_key_update, + test_rss_context, test_rss_context4, test_rss_context32, + test_rss_context_queue_reconfigure, + test_rss_context_overlap, test_rss_context_overlap2, + test_rss_context_out_of_order, test_rss_context4_create_with_cfg], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index edcedd7bffab..a5e800b8f103 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 import os +import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import cmd, ip +from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -82,6 +83,8 @@ class NetDrvEpEnv: self.env = _load_env_file(src_path) + self._stats_settle_time = None + # Things we try to destroy self.remote = None # These are for local testing state @@ -222,3 +225,17 @@ class NetDrvEpEnv: if remote: if not self._require_cmd(comm, "remote"): raise KsftSkipEx("Test requires (remote) command: " + comm) + + def wait_hw_stats_settle(self): + """ + Wait for HW stats to become consistent, some devices DMA HW stats + periodically so events won't be reflected until next sync. + Good drivers will tell us via ethtool what their sync period is. + """ + if self._stats_settle_time is None: + data = ethtool("-c " + self.ifname, json=True)[0] + + self._stats_settle_time = 0.025 + \ + data.get('stats-block-usecs', 0) / 1000 / 1000 + + time.sleep(self._stats_settle_time) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index abdb677bdb1c..d9c10613ae67 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -5,28 +5,45 @@ import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: - def __init__(self, env): + def __init__(self, env, port=None): env.require_cmd("iperf3", remote=True) self.env = env - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True) + if port is None: + port = rand_port() + self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) wait_port_listen(port) time.sleep(0.1) self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up - pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if not self._wait_pkts(pps=1000): + self.stop(verbose=True) + raise Exception("iperf3 traffic did not ramp up") + + def _wait_pkts(self, pkt_cnt=None, pps=None): + """ + Wait until we've seen pkt_cnt or until traffic ramps up to pps. + Only one of pkt_cnt or pss can be specified. + """ + pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] for _ in range(50): time.sleep(0.1) - now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] - if now - pkt > 1000: - return - pkt = now - self.stop(verbose=True) - raise Exception("iperf3 traffic did not ramp up") + pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if pps: + if pkt_now - pkt_start > pps / 10: + return True + pkt_start = pkt_now + elif pkt_cnt: + if pkt_now - pkt_start > pkt_cnt: + return True + return False + + def wait_pkts_and_stop(self, pkt_cnt): + failed = not self._wait_pkts(pkt_cnt=pkt_cnt) + self.stop(verbose=failed) def stop(self, verbose=None): self._iperf_client.process(terminate=True) diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh index 76f1ab4898d9..e1ad623146d7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh @@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh source $lib_dir/mirror_gre_lib.sh source $lib_dir/mirror_gre_topo_lib.sh +ALL_TESTS=" + test_keyful + test_soft + test_tos_fixed + test_ttl_inherit +" + setup_keyful() { tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \ @@ -118,15 +125,15 @@ test_span_gre_ttl_inherit() RET=0 ip link set dev $tundev type $type ttl inherit - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type ttl 100 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on TTL of inherit ($tcflags)" + log_test "$what: no offload on TTL of inherit" } test_span_gre_tos_fixed() @@ -138,61 +145,49 @@ test_span_gre_tos_fixed() RET=0 ip link set dev $tundev type $type tos 0x10 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type tos inherit - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on a fixed TOS ($tcflags)" + log_test "$what: no offload on a fixed TOS" } test_span_failable() { - local should_fail=$1; shift local tundev=$1; shift local what=$1; shift RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - if ((should_fail)); then - fail_test_span_gre_dir $tundev ingress - else - quick_test_span_gre_dir $tundev ingress - fi + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: should_fail=$should_fail ($tcflags)" + log_test "fail $what" } -test_failable() +test_keyful() { - local should_fail=$1; shift - - test_span_failable $should_fail gt6-key "mirror to keyful gretap" - test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay" + test_span_failable gt6-key "mirror to keyful gretap" } -test_sw() +test_soft() { - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - test_failable 0 - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_failable gt6-soft "mirror to gretap w/ soft underlay" } -test_hw() +test_tos_fixed() { - test_failable 1 - test_span_gre_tos_fixed gt4 gretap "mirror to gretap" test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap" +} + +test_ttl_inherit() +{ test_span_gre_ttl_inherit gt4 gretap "mirror to gretap" test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap" } @@ -202,16 +197,6 @@ trap cleanup EXIT setup_prepare setup_wait -if ! tc_offload_check; then - check_err 1 "Could not test offloaded functionality" - log_test "mlxsw-specific tests for mirror to gretap" - exit -fi - -tcflags="skip_hw" -test_sw - -tcflags="skip_sw" -test_hw +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index e5589e2fca85..d43093310e23 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -79,7 +79,7 @@ mirror_gre_tunnels_create() cat >> $MIRROR_GRE_BATCH_FILE <<-EOF filter add dev $swp1 ingress pref 1000 \ protocol ipv6 \ - flower $tcflags dst_ip $match_dip \ + flower skip_sw dst_ip $match_dip \ action mirred egress mirror dev $tun EOF done @@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy() done } -__mirror_gre_test() +mirror_gre_test() { local count=$1; shift local should_fail=$1; shift @@ -131,20 +131,6 @@ __mirror_gre_test() done } -mirror_gre_test() -{ - local count=$1; shift - local should_fail=$1; shift - - if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then - check_err 1 "Could not test offloaded functionality" - return - fi - - tcflags="skip_sw" - __mirror_gre_test $count $should_fail -} - mirror_gre_setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 31252bc8775e..4994bea5daf8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test max_group_size_test" + max_erp_entries_test max_group_size_test collision_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -457,7 +457,7 @@ delta_two_masks_one_key_test() { # If 2 keys are the same and only differ in mask in a way that # they belong under the same ERP (second is delta of the first), - # there should be no C-TCAM spill. + # there should be C-TCAM spill. RET=0 @@ -474,8 +474,8 @@ delta_two_masks_one_key_test() tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ action drop" - tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 - check_err $? "incorrect C-TCAM spill while inserting the second rule" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1 + check_err $? "C-TCAM spill did not happen while inserting the second rule" $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -1087,6 +1087,53 @@ max_group_size_test() log_test "max ACL group size test ($tcflags). max size $max_size" } +collision_test() +{ + # Filters cannot share an eRP if in the common unmasked part (i.e., + # without the delta bits) they have the same values. If the driver does + # not prevent such configuration (by spilling into the C-TCAM), then + # multiple entries will be present in the device with the same key, + # leading to collisions and a reduced scale. + # + # Create such a scenario and make sure all the filters are successfully + # added. + + RET=0 + + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all + # have the same values in the common unmasked part (dst_ip/24). + + tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \ + flower $tcflags dst_ip 198.51.100.0/24 \ + action drop + + for i in {0..255}; do + tc filter add dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) \ + flower $tcflags dst_ip 198.51.100.${i}/32 \ + action drop + ret=$? + [[ $ret -ne 0 ]] && break + done + + check_err $ret "failed to add all the filters" + + for i in {255..0}; do + tc filter del dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) flower + done + + tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower + + log_test "collision test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config index f35de0542b60..bcf7555eaffe 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/config +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -1,2 +1,8 @@ -CONFIG_VIRTIO_NET=y +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile new file mode 100644 index 000000000000..03d0449d307c --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for ifs(In Field Scan) selftests + +TEST_PROGS := test_ifs.sh + +include ../../../../../lib.mk diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh new file mode 100755 index 000000000000..8b68964b29f4 --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh @@ -0,0 +1,494 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the functionality of the Intel IFS(In Field Scan) driver. +# + +# Matched with kselftest framework: tools/testing/selftests/kselftest.h +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_XFAIL=2 +readonly KSFT_SKIP=4 + +readonly CPU_SYSFS="/sys/devices/system/cpu" +readonly CPU_OFFLINE_SYSFS="${CPU_SYSFS}/offline" +readonly IMG_PATH="/lib/firmware/intel/ifs_0" +readonly IFS_SCAN_MODE="0" +readonly IFS_ARRAY_BIST_SCAN_MODE="1" +readonly IFS_PATH="/sys/devices/virtual/misc/intel_ifs" +readonly IFS_SCAN_SYSFS_PATH="${IFS_PATH}_${IFS_SCAN_MODE}" +readonly IFS_ARRAY_BIST_SYSFS_PATH="${IFS_PATH}_${IFS_ARRAY_BIST_SCAN_MODE}" +readonly RUN_TEST="run_test" +readonly STATUS="status" +readonly DETAILS="details" +readonly STATUS_PASS="pass" +readonly PASS="PASS" +readonly FAIL="FAIL" +readonly INFO="INFO" +readonly XFAIL="XFAIL" +readonly SKIP="SKIP" +readonly IFS_NAME="intel_ifs" +readonly ALL="all" +readonly SIBLINGS="siblings" + +# Matches arch/x86/include/asm/intel-family.h and +# drivers/platform/x86/intel/ifs/core.c requirement as follows +readonly SAPPHIRERAPIDS_X="8f" +readonly EMERALDRAPIDS_X="cf" + +readonly INTEL_FAM6="06" + +LOOP_TIMES=3 +FML="" +MODEL="" +STEPPING="" +CPU_FMS="" +TRUE="true" +FALSE="false" +RESULT=$KSFT_PASS +IMAGE_NAME="" +INTERVAL_TIME=1 +OFFLINE_CPUS="" +# For IFS cleanup tags +ORIGIN_IFS_LOADED="" +IFS_IMAGE_NEED_RESTORE=$FALSE +IFS_LOG="/tmp/ifs_logs.$$" +RANDOM_CPU="" +DEFAULT_IMG_ID="" + +append_log() +{ + echo -e "$1" | tee -a "$IFS_LOG" +} + +online_offline_cpu_list() +{ + local on_off=$1 + local target_cpus=$2 + local cpu="" + local cpu_start="" + local cpu_end="" + local i="" + + if [[ -n "$target_cpus" ]]; then + for cpu in $(echo "$target_cpus" | tr ',' ' '); do + if [[ "$cpu" == *"-"* ]]; then + cpu_start="" + cpu_end="" + i="" + cpu_start=$(echo "$cpu" | cut -d "-" -f 1) + cpu_end=$(echo "$cpu" | cut -d "-" -f 2) + for((i=cpu_start;i<=cpu_end;i++)); do + append_log "[$INFO] echo $on_off > \ +${CPU_SYSFS}/cpu${i}/online" + echo "$on_off" > "$CPU_SYSFS"/cpu"$i"/online + done + else + set_target_cpu "$on_off" "$cpu" + fi + done + fi +} + +ifs_scan_result_summary() +{ + local failed_info pass_num skip_num fail_num + + if [[ -e "$IFS_LOG" ]]; then + failed_info=$(grep ^"\[${FAIL}\]" "$IFS_LOG") + fail_num=$(grep -c ^"\[${FAIL}\]" "$IFS_LOG") + skip_num=$(grep -c ^"\[${SKIP}\]" "$IFS_LOG") + pass_num=$(grep -c ^"\[${PASS}\]" "$IFS_LOG") + + if [[ "$fail_num" -ne 0 ]]; then + RESULT=$KSFT_FAIL + echo "[$INFO] IFS test failure summary:" + echo "$failed_info" + elif [[ "$skip_num" -ne 0 ]]; then + RESULT=$KSFT_SKIP + fi + echo "[$INFO] IFS test pass:$pass_num, skip:$skip_num, fail:$fail_num" + else + echo "[$INFO] No file $IFS_LOG for IFS scan summary" + fi +} + +ifs_cleanup() +{ + echo "[$INFO] Restore environment after IFS test" + + # Restore ifs origin image if origin image backup step is needed + [[ "$IFS_IMAGE_NEED_RESTORE" == "$TRUE" ]] && { + mv -f "$IMG_PATH"/"$IMAGE_NAME"_origin "$IMG_PATH"/"$IMAGE_NAME" + } + + # Restore the CPUs to the state before testing + [[ -z "$OFFLINE_CPUS" ]] || online_offline_cpu_list "0" "$OFFLINE_CPUS" + + lsmod | grep -q "$IFS_NAME" && [[ "$ORIGIN_IFS_LOADED" == "$FALSE" ]] && { + echo "[$INFO] modprobe -r $IFS_NAME" + modprobe -r "$IFS_NAME" + } + + ifs_scan_result_summary + [[ -e "$IFS_LOG" ]] && rm -rf "$IFS_LOG" + + echo "[RESULT] IFS test exit with $RESULT" + exit "$RESULT" +} + +do_cmd() +{ + local cmd=$* + local ret="" + + append_log "[$INFO] $cmd" + eval "$cmd" + ret=$? + if [[ $ret -ne 0 ]]; then + append_log "[$FAIL] $cmd failed. Return code is $ret" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +test_exit() +{ + local info=$1 + RESULT=$2 + + declare -A EXIT_MAP + EXIT_MAP[$KSFT_PASS]=$PASS + EXIT_MAP[$KSFT_FAIL]=$FAIL + EXIT_MAP[$KSFT_XFAIL]=$XFAIL + EXIT_MAP[$KSFT_SKIP]=$SKIP + + append_log "[${EXIT_MAP[$RESULT]}] $info" + ifs_cleanup +} + +online_all_cpus() +{ + local off_cpus="" + + OFFLINE_CPUS=$(cat "$CPU_OFFLINE_SYSFS") + online_offline_cpu_list "1" "$OFFLINE_CPUS" + + off_cpus=$(cat "$CPU_OFFLINE_SYSFS") + if [[ -z "$off_cpus" ]]; then + append_log "[$INFO] All CPUs are online." + else + append_log "[$XFAIL] There is offline cpu:$off_cpus after online all cpu!" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +get_cpu_fms() +{ + FML=$(grep -m 1 "family" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + MODEL=$(grep -m 1 "model" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + STEPPING=$(grep -m 1 "stepping" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + CPU_FMS="${FML}-${MODEL}-${STEPPING}" +} + +check_cpu_ifs_support_interval_time() +{ + get_cpu_fms + + if [[ "$FML" != "$INTEL_FAM6" ]]; then + test_exit "CPU family:$FML does not support IFS" "$KSFT_SKIP" + fi + + # Ucode has time interval requirement for IFS scan on same CPU as follows: + case $MODEL in + "$SAPPHIRERAPIDS_X") + INTERVAL_TIME=180; + ;; + "$EMERALDRAPIDS_X") + INTERVAL_TIME=30; + ;; + *) + # Set default interval time for other platforms + INTERVAL_TIME=1; + append_log "[$INFO] CPU FML:$FML model:0x$MODEL, default: 1s interval time" + ;; + esac +} + +check_ifs_loaded() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + if [[ -z "$ifs_info" ]]; then + append_log "[$INFO] modprobe $IFS_NAME" + modprobe "$IFS_NAME" || { + test_exit "Check if CONFIG_INTEL_IFS is set to m or \ +platform doesn't support ifs" "$KSFT_SKIP" + } + ifs_info=$(lsmod | grep "$IFS_NAME") + [[ -n "$ifs_info" ]] || test_exit "No ifs module listed by lsmod" "$KSFT_FAIL" + fi +} + +test_ifs_scan_entry() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + + if [[ -z "$ifs_info" ]]; then + ORIGIN_IFS_LOADED="$FALSE" + check_ifs_loaded + else + ORIGIN_IFS_LOADED="$TRUE" + append_log "[$INFO] Module $IFS_NAME is already loaded" + fi + + if [[ -d "$IFS_SCAN_SYSFS_PATH" ]]; then + append_log "[$PASS] IFS sysfs $IFS_SCAN_SYSFS_PATH entry is created\n" + else + test_exit "No sysfs entry in $IFS_SCAN_SYSFS_PATH" "$KSFT_FAIL" + fi +} + +load_image() +{ + local image_id=$1 + local image_info="" + local ret="" + + check_ifs_loaded + if [[ -e "${IMG_PATH}/${IMAGE_NAME}" ]]; then + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + [[ "$ret" -eq 0 ]] || { + append_log "[$FAIL] Load ifs image $image_id failed with ret:$ret\n" + return "$ret" + } + image_info=$(cat ${IFS_SCAN_SYSFS_PATH}/current_batch) + if [[ "$image_info" == 0x"$image_id" ]]; then + append_log "[$PASS] load IFS current_batch:$image_info" + else + append_log "[$FAIL] current_batch:$image_info is not expected:$image_id" + return "$KSFT_FAIL" + fi + else + append_log "[$FAIL] No IFS image file ${IMG_PATH}/${IMAGE_NAME}"\ + return "$KSFT_FAIL" + fi + return 0 +} + +test_load_origin_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + load_image "$image_id" || return $? + return 0 +} + +test_load_bad_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME} ${IMG_PATH}/${IMAGE_NAME}_origin" + + # Set IFS_IMAGE_NEED_RESTORE to true before corrupt the origin ifs image file + IFS_IMAGE_NEED_RESTORE=$TRUE + do_cmd "dd if=/dev/urandom of=${IMG_PATH}/${IMAGE_NAME} bs=1K count=6 2>/dev/null" + + # Use the specified judgment for negative testing + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + if [[ "$ret" -ne 0 ]]; then + append_log "[$PASS] Load invalid ifs image failed with ret:$ret not 0 as expected" + else + append_log "[$FAIL] Load invalid ifs image ret:$ret unexpectedly" + fi + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME}_origin ${IMG_PATH}/${IMAGE_NAME}" + IFS_IMAGE_NEED_RESTORE=$FALSE +} + +test_bad_and_origin_ifs_image() +{ + local image_id=$1 + + append_log "[$INFO] Test loading bad and then loading original IFS image:" + test_load_origin_ifs_image "$image_id" || return $? + test_load_bad_ifs_image "$image_id" + # Load origin image again and make sure it's worked + test_load_origin_ifs_image "$image_id" || return $? + append_log "[$INFO] Loading invalid IFS image and then loading initial image passed.\n" +} + +ifs_test_cpu() +{ + local ifs_mode=$1 + local cpu_num=$2 + local image_id status details ret result result_info + + echo "$cpu_num" > "$IFS_PATH"_"$ifs_mode"/"$RUN_TEST" + ret=$? + + status=$(cat "${IFS_PATH}_${ifs_mode}/${STATUS}") + details=$(cat "${IFS_PATH}_${ifs_mode}/${DETAILS}") + + if [[ "$ret" -eq 0 && "$status" == "$STATUS_PASS" ]]; then + result="$PASS" + else + result="$FAIL" + fi + + cpu_num=$(cat "${CPU_SYSFS}/cpu${cpu_num}/topology/thread_siblings_list") + + # There is no image file for IFS ARRAY BIST scan + if [[ -e "${IFS_PATH}_${ifs_mode}/current_batch" ]]; then + image_id=$(cat "${IFS_PATH}_${ifs_mode}/current_batch") + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, current_batch:0x%02x, \ +ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$image_id" "$ret" \ + "$status" "$details") + else + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$ret" "$status" "$details") + fi + + append_log "$result_info" +} + +ifs_test_cpus() +{ + local cpus_type=$1 + local ifs_mode=$2 + local image_id=$3 + local cpu_max_num="" + local cpu_num="" + + case "$cpus_type" in + "$ALL") + cpu_max_num=$(($(nproc) - 1)) + cpus=$(seq 0 $cpu_max_num) + ;; + "$SIBLINGS") + cpus=$(cat ${CPU_SYSFS}/cpu*/topology/thread_siblings_list \ + | sed -e 's/,.*//' \ + | sed -e 's/-.*//' \ + | sort -n \ + | uniq) + ;; + *) + test_exit "Invalid cpus_type:$cpus_type" "$KSFT_XFAIL" + ;; + esac + + for cpu_num in $cpus; do + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + + if [[ -z "$image_id" ]]; then + append_log "[$INFO] ifs_$ifs_mode test $cpus_type cpus completed\n" + else + append_log "[$INFO] ifs_$ifs_mode $cpus_type cpus with $CPU_FMS-$image_id.scan \ +completed\n" + fi +} + +test_ifs_same_cpu_loop() +{ + local ifs_mode=$1 + local cpu_num=$2 + local loop_times=$3 + + append_log "[$INFO] Test ifs mode $ifs_mode on CPU:$cpu_num for $loop_times rounds:" + [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]] && { + load_image "$DEFAULT_IMG_ID" || return $? + } + for (( i=1; i<=loop_times; i++ )); do + append_log "[$INFO] Loop iteration: $i in total of $loop_times" + # Only IFS scan needs the interval time + if [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]]; then + do_cmd "sleep $INTERVAL_TIME" + elif [[ "$ifs_mode" == "$IFS_ARRAY_BIST_SCAN_MODE" ]]; then + true + else + test_exit "Invalid ifs_mode:$ifs_mode" "$KSFT_XFAIL" + fi + + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + append_log "[$INFO] $loop_times rounds of ifs_$ifs_mode test on CPU:$cpu_num completed.\n" +} + +test_ifs_scan_available_imgs() +{ + local image_ids="" + local image_id="" + + append_log "[$INFO] Test ifs scan with available images:" + image_ids=$(find "$IMG_PATH" -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) + + for image_id in $image_ids; do + load_image "$image_id" || return $? + + ifs_test_cpus "$SIBLINGS" "$IFS_SCAN_MODE" "$image_id" + # IFS scan requires time interval for the scan on the same CPU + do_cmd "sleep $INTERVAL_TIME" + done +} + +prepare_ifs_test_env() +{ + local max_cpu="" + + check_cpu_ifs_support_interval_time + + online_all_cpus + max_cpu=$(($(nproc) - 1)) + RANDOM_CPU=$(shuf -i 0-$max_cpu -n 1) + + DEFAULT_IMG_ID=$(find $IMG_PATH -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | head -n 1 \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) +} + +test_ifs() +{ + prepare_ifs_test_env + + test_ifs_scan_entry + + if [[ -z "$DEFAULT_IMG_ID" ]]; then + append_log "[$SKIP] No proper ${IMG_PATH}/${CPU_FMS}-*.scan, skip ifs_0 scan" + else + test_bad_and_origin_ifs_image "$DEFAULT_IMG_ID" + test_ifs_scan_available_imgs + test_ifs_same_cpu_loop "$IFS_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + fi + + if [[ -d "$IFS_ARRAY_BIST_SYSFS_PATH" ]]; then + ifs_test_cpus "$SIBLINGS" "$IFS_ARRAY_BIST_SCAN_MODE" + test_ifs_same_cpu_loop "$IFS_ARRAY_BIST_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + else + append_log "[$SKIP] No $IFS_ARRAY_BIST_SYSFS_PATH, skip IFS ARRAY BIST scan" + fi +} + +trap ifs_cleanup SIGTERM SIGINT +test_ifs +ifs_cleanup diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index fb4472ddffd8..ba012bc5aab9 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,10 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS = -Wall CFLAGS += -Wno-nonnull -CFLAGS += -D_GNU_SOURCE + +ALIGNS := 0x1000 0x200000 0x1000000 +ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) +ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) +ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) TEST_PROGS := binfmt_script.py -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular +TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -28,9 +32,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ -$(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ -$(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ -$(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ +$(OUTPUT)/load_address.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -pie $< -o $@ +$(OUTPUT)/load_address.static.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -static-pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c index 17e3207d34ae..8257fddba8c8 100644 --- a/tools/testing/selftests/exec/load_address.c +++ b/tools/testing/selftests/exec/load_address.c @@ -5,11 +5,13 @@ #include <link.h> #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #include "../kselftest.h" struct Statistics { unsigned long long load_address; unsigned long long alignment; + bool interp; }; int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) @@ -26,11 +28,20 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) stats->alignment = 0; for (i = 0; i < info->dlpi_phnum; i++) { + unsigned long long align; + + if (info->dlpi_phdr[i].p_type == PT_INTERP) { + stats->interp = true; + continue; + } + if (info->dlpi_phdr[i].p_type != PT_LOAD) continue; - if (info->dlpi_phdr[i].p_align > stats->alignment) - stats->alignment = info->dlpi_phdr[i].p_align; + align = info->dlpi_phdr[i].p_align; + + if (align > stats->alignment) + stats->alignment = align; } return 1; // Terminate dl_iterate_phdr. @@ -38,27 +49,57 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) int main(int argc, char **argv) { - struct Statistics extracted; - unsigned long long misalign; + struct Statistics extracted = { }; + unsigned long long misalign, pow2; + bool interp_needed; + char buf[1024]; + FILE *maps; int ret; ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(4); + + /* Dump maps file for debugging reference. */ + maps = fopen("/proc/self/maps", "r"); + if (!maps) + ksft_exit_fail_msg("FAILED: /proc/self/maps: %s\n", strerror(errno)); + while (fgets(buf, sizeof(buf), maps)) { + ksft_print_msg("%s", buf); + } + fclose(maps); + /* Walk the program headers. */ ret = dl_iterate_phdr(ExtractStatistics, &extracted); if (ret != 1) ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n"); - if (extracted.alignment == 0) - ksft_exit_fail_msg("FAILED: No alignment found\n"); - else if (extracted.alignment & (extracted.alignment - 1)) - ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n"); + /* Report our findings. */ + ksft_print_msg("load_address=%#llx alignment=%#llx\n", + extracted.load_address, extracted.alignment); + + /* If we're named with ".static." we expect no INTERP. */ + interp_needed = strstr(argv[0], ".static.") == NULL; + + /* Were we built as expected? */ + ksft_test_result(interp_needed == extracted.interp, + "%s INTERP program header %s\n", + interp_needed ? "Wanted" : "Unwanted", + extracted.interp ? "seen" : "missing"); + + /* Did we find an alignment? */ + ksft_test_result(extracted.alignment != 0, + "Alignment%s found\n", extracted.alignment ? "" : " NOT"); + + /* Is the alignment sane? */ + pow2 = extracted.alignment & (extracted.alignment - 1); + ksft_test_result(pow2 == 0, + "Alignment is%s a power of 2: %#llx\n", + pow2 == 0 ? "" : " NOT", extracted.alignment); + /* Is the load address aligned? */ misalign = extracted.load_address & (extracted.alignment - 1); - if (misalign) - ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n", - extracted.alignment, extracted.load_address); + ksft_test_result(misalign == 0, "Load Address is %saligned (%#llx)\n", + misalign ? "MIS" : "", misalign); - ksft_test_result_pass("Completed\n"); ksft_finished(); } diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 71ec34bf1501..4373cea79b79 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) + +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c index f142a137526c..85acb4e3ef00 100644 --- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c +++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c @@ -13,6 +13,8 @@ #include <sys/eventfd.h> #include "../../kselftest_harness.h" +#define EVENTFD_TEST_ITERATIONS 100000UL + struct error { int code; char msg[512]; @@ -40,7 +42,7 @@ static inline int sys_eventfd2(unsigned int count, int flags) return syscall(__NR_eventfd2, count, flags); } -TEST(eventfd01) +TEST(eventfd_check_flag_rdwr) { int fd, flags; @@ -54,7 +56,7 @@ TEST(eventfd01) close(fd); } -TEST(eventfd02) +TEST(eventfd_check_flag_cloexec) { int fd, flags; @@ -68,7 +70,7 @@ TEST(eventfd02) close(fd); } -TEST(eventfd03) +TEST(eventfd_check_flag_nonblock) { int fd, flags; @@ -83,7 +85,7 @@ TEST(eventfd03) close(fd); } -TEST(eventfd04) +TEST(eventfd_chek_flag_cloexec_and_nonblock) { int fd, flags; @@ -161,7 +163,7 @@ static int verify_fdinfo(int fd, struct error *err, const char *prefix, return 0; } -TEST(eventfd05) +TEST(eventfd_check_flag_semaphore) { struct error err = {0}; int fd, ret; @@ -183,4 +185,128 @@ TEST(eventfd05) close(fd); } +/* + * A write(2) fails with the error EINVAL if the size of the supplied buffer + * is less than 8 bytes, or if an attempt is made to write the value + * 0xffffffffffffffff. + */ +TEST(eventfd_check_write) +{ + uint64_t value = 1; + ssize_t size; + int fd; + + fd = sys_eventfd2(0, 0); + ASSERT_GE(fd, 0); + + size = write(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + + value = (uint64_t)-1; + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + close(fd); +} + +/* + * A read(2) fails with the error EINVAL if the size of the supplied buffer is + * less than 8 bytes. + */ +TEST(eventfd_check_read) +{ + uint64_t value; + ssize_t size; + int fd; + + fd = sys_eventfd2(1, 0); + ASSERT_GE(fd, 0); + + size = read(fd, &value, sizeof(int)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EINVAL); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + + close(fd); +} + + +/* + * If EFD_SEMAPHORE was not specified and the eventfd counter has a nonzero + * value, then a read(2) returns 8 bytes containing that value, and the + * counter's value is reset to zero. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_nonsemaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(uint64_t)); + EXPECT_EQ(value, EVENTFD_TEST_ITERATIONS); + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + +/* + * If EFD_SEMAPHORE was specified and the eventfd counter has a nonzero value, + * then a read(2) returns 8 bytes containing the value 1, and the counter's + * value is decremented by 1. + * If the eventfd counter is zero at the time of the call to read(2), then the + * call fails with the error EAGAIN if the file descriptor has been made nonblocking. + */ +TEST(eventfd_check_read_with_semaphore) +{ + uint64_t value; + ssize_t size; + int fd; + int i; + + fd = sys_eventfd2(0, EFD_SEMAPHORE|EFD_NONBLOCK); + ASSERT_GE(fd, 0); + + value = 1; + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = write(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + } + + for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) { + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, sizeof(value)); + EXPECT_EQ(value, 1); + } + + size = read(fd, &value, sizeof(value)); + EXPECT_EQ(size, -1); + EXPECT_EQ(errno, EAGAIN); + + close(fd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 07a0d5b545ca..3af3136e35a4 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test +TEST_GEN_PROGS := statmount_test statmount_test_ns include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h new file mode 100644 index 000000000000..f4294bab9d73 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __STATMOUNT_H +#define __STATMOUNT_H + +#include <stdint.h> +#include <linux/mount.h> +#include <asm/unistd.h> + +static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, + uint64_t last_mnt_id, uint64_t list[], size_t num, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +#endif /* __STATMOUNT_H */ diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index e6d7c4f1c85b..c773334bbcc9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -4,17 +4,15 @@ #include <assert.h> #include <stddef.h> -#include <stdint.h> #include <sched.h> #include <fcntl.h> #include <sys/param.h> #include <sys/mount.h> #include <sys/stat.h> #include <sys/statfs.h> -#include <linux/mount.h> #include <linux/stat.h> -#include <asm/unistd.h> +#include "statmount.h" #include "../../kselftest.h" static const char *const known_fs[] = { @@ -36,18 +34,6 @@ static const char *const known_fs[] = { "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; -static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, - size_t bufsize, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = mask, - }; - - return syscall(__NR_statmount, &req, buf, bufsize, flags); -} - static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) { size_t bufsize = 1 << 15; @@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne int ret; for (;;) { - ret = statmount(mnt_id, mask, tmp, bufsize, flags); + ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags); if (ret != -1) break; if (tofree) @@ -121,12 +107,20 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; static int orig_root; static uint64_t root_id, parent_id; static uint32_t old_root_id, old_parent_id; - +static FILE *f_mountinfo; static void cleanup_namespace(void) { - fchdir(orig_root); - chroot("."); + int ret; + + ret = fchdir(orig_root); + if (ret == -1) + ksft_perror("fchdir to original root"); + + ret = chroot("."); + if (ret == -1) + ksft_perror("chroot to original root"); + umount2(root_mntpoint, MNT_DETACH); rmdir(root_mntpoint); } @@ -138,7 +132,7 @@ static void setup_namespace(void) uid_t uid = getuid(); gid_t gid = getgid(); - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); if (ret == -1) ksft_exit_fail_msg("unsharing mountns and userns: %s\n", strerror(errno)); @@ -149,6 +143,11 @@ static void setup_namespace(void) sprintf(buf, "0 %d 1", gid); write_file("/proc/self/gid_map", buf); + f_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!f_mountinfo) + ksft_exit_fail_msg("failed to open mountinfo: %s\n", + strerror(errno)); + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); if (ret == -1) ksft_exit_fail_msg("making mount tree private: %s\n", @@ -208,25 +207,13 @@ static int setup_mount_tree(int log2_num) return 0; } -static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, - uint64_t list[], size_t num, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = last_mnt_id, - }; - - return syscall(__NR_listmount, &req, list, num, flags); -} - static void test_listmount_empty_root(void) { ssize_t res; const unsigned int size = 32; uint64_t list[size]; - res = listmount(LSMT_ROOT, 0, list, size, 0); + res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -251,7 +238,7 @@ static void test_statmount_zero_mask(void) struct statmount sm; int ret; - ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount zero mask: %s\n", strerror(errno)); @@ -277,7 +264,7 @@ static void test_statmount_mnt_basic(void) int ret; uint64_t mask = STATMOUNT_MNT_BASIC; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount mnt basic: %s\n", strerror(errno)); @@ -337,7 +324,7 @@ static void test_statmount_sb_basic(void) struct statx sx; struct statfs sf; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount sb basic: %s\n", strerror(errno)); @@ -462,6 +449,88 @@ static void test_statmount_fs_type(void) free(sm); } +static void test_statmount_mnt_opts(void) +{ + struct statmount *sm; + const char *statmount_opts; + char *line = NULL; + size_t len = 0; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, + 0); + if (!sm) { + ksft_test_result_fail("statmount mnt opts: %s\n", + strerror(errno)); + return; + } + + while (getline(&line, &len, f_mountinfo) != -1) { + int i; + char *p, *p2; + unsigned int old_mnt_id; + + old_mnt_id = atoi(line); + if (old_mnt_id != sm->mnt_id_old) + continue; + + for (p = line, i = 0; p && i < 5; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + + p2 = strchr(p + 1, ' '); + if (!p2) + continue; + *p2 = '\0'; + p = strchr(p2 + 1, '-'); + if (!p) + continue; + for (p++, i = 0; p && i < 2; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + p++; + + /* skip generic superblock options */ + if (strncmp(p, "ro", 2) == 0) + p += 2; + else if (strncmp(p, "rw", 2) == 0) + p += 2; + if (*p == ',') + p++; + if (strncmp(p, "sync", 4) == 0) + p += 4; + if (*p == ',') + p++; + if (strncmp(p, "dirsync", 7) == 0) + p += 7; + if (*p == ',') + p++; + if (strncmp(p, "lazytime", 8) == 0) + p += 8; + if (*p == ',') + p++; + p2 = strrchr(p, '\n'); + if (p2) + *p2 = '\0'; + + statmount_opts = sm->str + sm->mnt_opts; + if (strcmp(statmount_opts, p) != 0) + ksft_test_result_fail( + "unexpected mount options: '%s' != '%s'\n", + statmount_opts, p); + else + ksft_test_result_pass("statmount mount options\n"); + free(sm); + free(line); + return; + } + + ksft_test_result_fail("didnt't find mount entry\n"); + free(sm); + free(line); +} + static void test_statmount_string(uint64_t mask, size_t off, const char *name) { struct statmount *sm; @@ -498,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) exactsize = sm->size; shortsize = sizeof(*sm) + i; - ret = statmount(root_id, mask, sm, exactsize, 0); + ret = statmount(root_id, 0, mask, sm, exactsize, 0); if (ret == -1) { ksft_test_result_fail("statmount exact size: %s\n", strerror(errno)); goto out; } errno = 0; - ret = statmount(root_id, mask, sm, shortsize, 0); + ret = statmount(root_id, 0, mask, sm, shortsize, 0); if (ret != -1 || errno != EOVERFLOW) { ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", strerror(errno)); @@ -533,7 +602,7 @@ static void test_listmount_tree(void) if (res == -1) return; - num = res = listmount(LSMT_ROOT, 0, list, size, 0); + num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -545,7 +614,7 @@ static void test_listmount_tree(void) } for (i = 0; i < size - step;) { - res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0); if (res == -1) ksft_test_result_fail("short listmount: %s\n", strerror(errno)); @@ -577,18 +646,18 @@ int main(void) int ret; uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | - STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID; ksft_print_header(); - ret = statmount(0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); setup_namespace(); - ksft_set_plan(14); + ksft_set_plan(15); test_listmount_empty_root(); test_statmount_zero_mask(); test_statmount_mnt_basic(); @@ -596,6 +665,7 @@ int main(void) test_statmount_mnt_root(); test_statmount_mnt_point(); test_statmount_fs_type(); + test_statmount_mnt_opts(); test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c new file mode 100644 index 000000000000..e044f5fc57fd --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include <assert.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <linux/nsfs.h> +#include <linux/stat.h> + +#include "statmount.h" +#include "../../kselftest.h" + +#define NSID_PASS 0 +#define NSID_FAIL 1 +#define NSID_SKIP 2 +#define NSID_ERROR 3 + +static void handle_result(int ret, const char *testname) +{ + if (ret == NSID_PASS) + ksft_test_result_pass("%s\n", testname); + else if (ret == NSID_FAIL) + ksft_test_result_fail("%s\n", testname); + else if (ret == NSID_ERROR) + ksft_exit_fail_msg("%s\n", testname); + else + ksft_test_result_skip("%s\n", testname); +} + +static inline int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); + return -1; + } + + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); + return -1; + } + + ret = WEXITSTATUS(status); + return ret; +} + +static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) +{ + int fd = open(mnt_ns, O_RDONLY); + + if (fd < 0) { + ksft_print_msg("failed to open for ns %s: %s\n", + mnt_ns, strerror(errno)); + sleep(60); + return NSID_ERROR; + } + + if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) { + ksft_print_msg("failed to get the nsid for ns %s: %s\n", + mnt_ns, strerror(errno)); + return NSID_ERROR; + } + close(fd); + return NSID_PASS; +} + +static int get_mnt_id(const char *path, uint64_t *mnt_id) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return NSID_ERROR; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return NSID_ERROR; + } + + *mnt_id = sx.stx_mnt_id; + return NSID_PASS; +} + +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return NSID_ERROR; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret != NSID_PASS) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret != NSID_PASS) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret != NSID_PASS) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) { + ksft_print_msg("making mount tree private: %s\n", + strerror(errno)); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int _test_statmount_mnt_ns_id(void) +{ + struct statmount sm; + uint64_t mnt_ns_id; + uint64_t root_id; + int ret; + + ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); + if (ret != NSID_PASS) + return ret; + + ret = get_mnt_id("/", &root_id); + if (ret != NSID_PASS) + return ret; + + ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + return NSID_FAIL; + } + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + + return NSID_PASS; +} + +static void test_statmount_mnt_ns_id(void) +{ + pid_t pid; + int ret; + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + /* We're the original pid, wait for the result. */ + if (pid != 0) { + ret = wait_for_pid(pid); + handle_result(ret, "test statmount ns id"); + return; + } + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + ret = _test_statmount_mnt_ns_id(); + exit(ret); +} + +static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts) +{ + uint64_t list[256]; + uint64_t mnt_ns_id; + uint64_t nr_mounts; + char buf[256]; + int ret; + + /* Get the mount ns id for our child. */ + snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid); + ret = get_mnt_ns_id(buf, &mnt_ns_id); + + nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (nr_mounts != child_nr_mounts) { + ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts, + child_nr_mounts); + return NSID_FAIL; + } + + /* Validate that all of our entries match our mnt_ns_id. */ + for (int i = 0; i < nr_mounts; i++) { + struct statmount sm; + + ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm, + sizeof(sm), 0); + if (ret < 0) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + } + + return NSID_PASS; +} + +static void test_listmount_ns(void) +{ + uint64_t nr_mounts; + char pval; + int child_ready_pipe[2]; + int parent_ready_pipe[2]; + pid_t pid; + int ret, child_ret; + + if (pipe(child_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the child pipe: %s\n", + strerror(errno)); + if (pipe(parent_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the parent pipe: %s\n", + strerror(errno)); + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + if (pid == 0) { + char cval; + uint64_t list[256]; + + close(child_ready_pipe[0]); + close(parent_ready_pipe[1]); + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + exit(NSID_FAIL); + } + + /* + * Tell our parent how many mounts we have, and then wait for it + * to tell us we're done. + */ + write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)); + read(parent_ready_pipe[0], &cval, sizeof(cval)); + exit(NSID_PASS); + } + + close(child_ready_pipe[1]); + close(parent_ready_pipe[0]); + + /* Wait until the child has created everything. */ + if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) != + sizeof(nr_mounts)) + ret = NSID_ERROR; + + ret = validate_external_listmount(pid, nr_mounts); + + if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval)) + ret = NSID_ERROR; + + child_ret = wait_for_pid(pid); + if (child_ret != NSID_PASS) + ret = child_ret; + handle_result(ret, "test listmount ns id"); +} + +int main(void) +{ + int ret; + + ksft_print_header(); + ret = statmount(0, 0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + ksft_set_plan(2); + test_statmount_mnt_ns_id(); + test_listmount_ns(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc new file mode 100644 index 000000000000..ff88f97e41fb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc @@ -0,0 +1,103 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" +INSTANCE3="instances/test3_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi + if [ -d $INSTANCE3 ]; then + echo nop > $INSTANCE3/current_tracer + rmdir $INSTANCE3 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 +mkdir $INSTANCE3 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +do_test() { + REGEX=$1 + TEST=$2 + + # filter something, schedule is always good + if ! echo "$REGEX" > set_ftrace_filter; then + fail "can not enable filter $REGEX" + fi + + echo > trace + echo function_graph > current_tracer + enable_tracing + sleep 1 + # search for functions (has "{" or ";" on the line) + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep -v "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -ne 0 ]; then + fail "Graph filtering not working by itself against $TEST?" + fi + + # Make sure we did find something + echo 0 > tracing_on + count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` + echo 1 > tracing_on + if [ $count -eq 0 ]; then + fail "No traces found with $TEST?" + fi +} + +do_test '*sched*' 'sched' +cd $INSTANCE1 +do_test '*lock*' 'lock' +cd $WD +cd $INSTANCE2 +do_test '*rcu*' 'rcu' +cd $WD +cd $INSTANCE3 +echo function_graph > current_tracer + +sleep 1 +count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l` +if [ $count -eq 0 ]; then + fail "No traces found with all tracing?" +fi + +cd $WD +echo nop > current_tracer +echo nop > $INSTANCE1/current_tracer +echo nop > $INSTANCE2/current_tracer +echo nop > $INSTANCE3/current_tracer + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 2f7211254529..8dcce001881d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -8,12 +8,18 @@ # Also test it on an instance directory do_function_fork=1 +do_funcgraph_proc=1 if [ ! -f options/function-fork ]; then do_function_fork=0 echo "no option for function-fork found. Option will not be tested." fi +if [ ! -f options/funcgraph-proc ]; then + do_funcgraph_proc=0 + echo "no option for function-fork found. Option will not be tested." +fi + read PID _ < /proc/self/stat if [ $do_function_fork -eq 1 ]; then @@ -21,12 +27,19 @@ if [ $do_function_fork -eq 1 ]; then orig_value=`grep function-fork trace_options` fi +if [ $do_funcgraph_proc -eq 1 ]; then + orig_value2=`cat options/funcgraph-proc` + echo 1 > options/funcgraph-proc +fi + do_reset() { - if [ $do_function_fork -eq 0 ]; then - return + if [ $do_function_fork -eq 1 ]; then + echo $orig_value > trace_options fi - echo $orig_value > trace_options + if [ $do_funcgraph_proc -eq 1 ]; then + echo $orig_value2 > options/funcgraph-proc + fi } fail() { # msg @@ -36,13 +49,15 @@ fail() { # msg } do_test() { + TRACER=$1 + disable_tracing echo do_execve* > set_ftrace_filter echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid - echo function > current_tracer + echo $TRACER > current_tracer if [ $do_function_fork -eq 1 ]; then # don't allow children to be traced @@ -82,7 +97,11 @@ do_test() { fi } -do_test +do_test function +if grep -s function_graph available_tracers; then + do_test function_graph +fi + do_reset exit 0 diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 994fa3468f17..f79f9bac7918 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) -CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE= -pthread $(INCLUDES) $(KHDR_INCLUDES) +CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) LDLIBS := -lpthread -lrt LOCAL_HDRS := \ diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index f825623e3edc..75b7b4ef6cfa 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -460,7 +460,7 @@ FIXTURE(hid_bpf) { int hid_id; pthread_t tid; struct hid *skel; - int hid_links[3]; /* max number of programs loaded in a single test */ + struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */ }; static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) { @@ -470,9 +470,14 @@ static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) close(self->hidraw_fd); self->hidraw_fd = 0; + if (!self->skel) + return; + + hid__detach(self->skel); + for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) { if (self->hid_links[i]) - close(self->hid_links[i]); + bpf_link__destroy(self->hid_links[i]); } hid__destroy(self->skel); @@ -527,14 +532,8 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { - int attach_fd, err = -EINVAL; - struct attach_prog_args args = { - .retval = -1, - }; - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr, - .ctx_in = &args, - .ctx_size_in = sizeof(args), - ); + struct bpf_map *iter_map; + int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) TH_LOG("too many programs are to be loaded"); @@ -545,37 +544,52 @@ static void load_programs(const struct test_program programs[], for (int i = 0; i < progs_count; i++) { struct bpf_program *prog; + struct bpf_map *map; + int *ops_hid_id; prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, programs[i].name); ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); bpf_program__set_autoload(prog, true); + + map = bpf_object__find_map_by_name(*self->skel->skeleton->obj, + programs[i].name + 4); + ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'", + programs[i].name + 4); + + /* hid_id is the first field of struct hid_bpf_ops */ + ops_hid_id = bpf_map__initial_value(map, NULL); + ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data"); + + *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); - attach_fd = bpf_program__fd(self->skel->progs.attach_prog); - ASSERT_GE(attach_fd, 0) TH_LOG("locate attach_prog: %d", attach_fd); - for (int i = 0; i < progs_count; i++) { - struct bpf_program *prog; + struct bpf_map *map; - prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, - programs[i].name); - ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); - - args.prog_fd = bpf_program__fd(prog); - args.hid = self->hid_id; - args.insert_head = programs[i].insert_head; - err = bpf_prog_test_run_opts(attach_fd, &tattr); - ASSERT_GE(args.retval, 0) - TH_LOG("attach_hid(%s): %d", programs[i].name, args.retval); + map = bpf_object__find_map_by_name(*self->skel->skeleton->obj, + programs[i].name + 4); + ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'", + programs[i].name + 4); - self->hid_links[i] = args.retval; + self->hid_links[i] = bpf_map__attach_struct_ops(map); + ASSERT_OK_PTR(self->hid_links[i]) TH_LOG("failed to attach struct ops '%s'", + programs[i].name + 4); } + hid__attach(self->skel); + self->hidraw_fd = open_hidraw(self->dev_id); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -640,6 +654,65 @@ TEST_F(hid_bpf, raw_event) } /* + * Attach hid_first_event to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can change the data + */ +TEST_F(hid_bpf, subprog_raw_event) +{ + const struct test_program progs[] = { + { .name = "hid_subprog_first_event" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[2], 47); + + /* inject another event */ + memset(buf, 0, sizeof(buf)); + buf[0] = 1; + buf[1] = 47; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[2], 52); +} + +/* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + +/* * Ensures that we can attach/detach programs */ TEST_F(hid_bpf, test_attach_detach) @@ -648,13 +721,17 @@ TEST_F(hid_bpf, test_attach_detach) { .name = "hid_first_event" }, { .name = "hid_second_event" }, }; + struct bpf_link *link; __u8 buf[10] = {0}; - int err, link; + int err, link_fd; LOAD_PROGRAMS(progs); link = self->hid_links[0]; - ASSERT_GT(link, 0) TH_LOG("HID-BPF link not created"); + ASSERT_OK_PTR(link) TH_LOG("HID-BPF link not created"); + + link_fd = bpf_link__fd(link); + ASSERT_GE(link_fd, 0) TH_LOG("HID-BPF link FD not valid"); /* inject one event */ buf[0] = 1; @@ -673,7 +750,7 @@ TEST_F(hid_bpf, test_attach_detach) /* pin the first program and immediately unpin it */ #define PIN_PATH "/sys/fs/bpf/hid_first_event" - err = bpf_obj_pin(link, PIN_PATH); + err = bpf_obj_pin(link_fd, PIN_PATH); ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin"); remove(PIN_PATH); #undef PIN_PATH @@ -876,6 +953,325 @@ TEST_F(hid_bpf, test_hid_user_raw_request_call) } /* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is called and prevents the + * call to uhid. + */ +TEST_F(hid_bpf, test_hid_filter_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_filter_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* first check that we did not attach to device_event */ + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test"); + + /* now check that our program is preventing hid_hw_raw_request() */ + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_LT(err, 0) TH_LOG("unexpected success while reading HIDIOCGFEATURE: %d", err); + ASSERT_EQ(errno, 20) TH_LOG("unexpected error code while reading HIDIOCGFEATURE: %d", + errno); + + /* remove our bpf program and check that we can now emit commands */ + + /* detach the program */ + detach_bpf(self); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); + + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGFEATURE: %d", err); +} + +/* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is called and can issue the call + * to uhid and transform the answer. + */ +TEST_F(hid_bpf, test_hid_change_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_hidraw_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err); + + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 3); + ASSERT_EQ(buf[2], 4); +} + +/* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is called and prevents the + * call to uhid. + */ +TEST_F(hid_bpf, test_hid_filter_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_filter_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* first check that we did not attach to device_event */ + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test"); + + /* now check that our program is preventing hid_hw_output_report() */ + + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 3); + ASSERT_LT(err, 0) TH_LOG("unexpected success while sending hid_hw_output_report: %d", err); + ASSERT_EQ(errno, 25) TH_LOG("unexpected error code while sending hid_hw_output_report: %d", + errno); + + /* remove our bpf program and check that we can now emit commands */ + + /* detach the program */ + detach_bpf(self); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); + + err = write(self->hidraw_fd, buf, 3); + ASSERT_GE(err, 0) TH_LOG("error while sending hid_hw_output_report: %d", err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is called and can issue the call + * to uhid and transform the answer. + */ +TEST_F(hid_bpf, test_hid_change_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_hidraw_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 10); + ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d", + err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 8); + ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d", + err); +} + +/* + * Attach hid_multiply_event_wq to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can add extra data + */ +TEST_F(hid_bpf, test_multiply_events_wq) +{ + const struct test_program progs[] = { + { .name = "hid_test_multiply_events_wq" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 47); + + usleep(100000); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 3); +} + +/* + * Attach hid_multiply_event to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can add extra data + */ +TEST_F(hid_bpf, test_multiply_events) +{ + const struct test_program progs[] = { + { .name = "hid_test_multiply_events" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 47); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 52); +} + +/* + * Call hid_bpf_input_report against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_input_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 3); + + /* read the data from hidraw: hid_bpf_try_input_report should work exactly one time */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 4); + + /* read the data from hidraw: there should be none */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, -1) TH_LOG("read_hidraw"); +} + +/* * Attach hid_insert{0,1,2} to the given uhid device, * retrieve and open the matching hidraw node, * inject one event in the uhid device, diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index f67d35def142..5ecc845ef792 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -14,8 +14,8 @@ struct attach_prog_args { __u64 callback_check = 52; __u64 callback2_check = 52; -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx) +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); @@ -29,8 +29,38 @@ int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx) return hid_ctx->size; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops first_event = { + .hid_device_event = (void *)hid_first_event, + .hid_id = 2, +}; + +int __hid_subprog_first_event(struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); + + if (!rw_data) + return 0; /* EPERM check */ + + rw_data[2] = rw_data[1] + 5; + + return hid_ctx->size; +} + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_subprog_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + return __hid_subprog_first_event(hid_ctx, type); +} + +SEC(".struct_ops.link") +struct hid_bpf_ops subprog_first_event = { + .hid_device_event = (void *)hid_subprog_first_event, + .hid_id = 2, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -42,8 +72,13 @@ int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx) return hid_ctx->size; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops second_event = { + .hid_device_event = (void *)hid_second_event, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); @@ -55,15 +90,10 @@ int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx) return 9; } -SEC("syscall") -int attach_prog(struct attach_prog_args *ctx) -{ - ctx->retval = hid_bpf_attach_prog(ctx->hid, - ctx->prog_fd, - ctx->insert_head ? HID_BPF_FLAG_INSERT_HEAD : - HID_BPF_FLAG_NONE); - return 0; -} +SEC(".struct_ops.link") +struct hid_bpf_ops change_report_id = { + .hid_device_event = (void *)hid_change_report_id, +}; struct hid_hw_request_syscall_args { /* data needs to come at offset 0 so we can use it in calls */ @@ -181,7 +211,12 @@ static const __u8 rdesc[] = { 0xc0, /* END_COLLECTION */ }; -SEC("?fmod_ret/hid_bpf_rdesc_fixup") +/* + * the following program is marked as sleepable (struct_ops.s). + * This is not strictly mandatory but is a nice test for + * sleepable struct_ops + */ +SEC("?struct_ops.s/hid_rdesc_fixup") int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */); @@ -200,8 +235,13 @@ int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx) return sizeof(rdesc) + 73; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops rdesc_fixup = { + .hid_rdesc_fixup = (void *)hid_rdesc_fixup, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -217,8 +257,14 @@ int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx) return 0; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert1 = { + .hid_device_event = (void *)hid_test_insert1, + .flags = BPF_F_BEFORE, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -234,8 +280,13 @@ int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx) return 0; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert2 = { + .hid_device_event = (void *)hid_test_insert2, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -250,3 +301,300 @@ int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx) return 0; } + +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert3 = { + .hid_device_event = (void *)hid_test_insert3, +}; + +SEC("?struct_ops/hid_hw_request") +int BPF_PROG(hid_test_filter_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + return -20; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_filter_raw_request = { + .hid_hw_request = (void *)hid_test_filter_raw_request, +}; + +static struct file *current_file; + +SEC("fentry/hidraw_open") +int BPF_PROG(hidraw_open, struct inode *inode, struct file *file) +{ + current_file = file; + return 0; +} + +SEC("?struct_ops.s/hid_hw_request") +int BPF_PROG(hid_test_hidraw_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* check if the incoming request comes from our hidraw operation */ + if (source == (__u64)current_file) { + data[0] = reportnum; + + ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype); + if (ret != 2) + return -1; + data[0] = reportnum + 1; + data[1] = reportnum + 2; + data[2] = reportnum + 3; + return 3; + } + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_hidraw_raw_request = { + .hid_hw_request = (void *)hid_test_hidraw_raw_request, +}; + +SEC("?struct_ops.s/hid_hw_request") +int BPF_PROG(hid_test_infinite_loop_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + */ + data[0] = reportnum; + + ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype); + if (ret == 2) + return 3; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_raw_request = { + .hid_hw_request = (void *)hid_test_infinite_loop_raw_request, +}; + +SEC("?struct_ops/hid_hw_output_report") +int BPF_PROG(hid_test_filter_output_report, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + return -25; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_filter_output_report = { + .hid_hw_output_report = (void *)hid_test_filter_output_report, +}; + +SEC("?struct_ops.s/hid_hw_output_report") +int BPF_PROG(hid_test_hidraw_output_report, struct hid_bpf_ctx *hctx, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* check if the incoming request comes from our hidraw operation */ + if (source == (__u64)current_file) + return hid_bpf_hw_output_report(hctx, data, 2); + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_hidraw_output_report = { + .hid_hw_output_report = (void *)hid_test_hidraw_output_report, +}; + +SEC("?struct_ops.s/hid_hw_output_report") +int BPF_PROG(hid_test_infinite_loop_output_report, struct hid_bpf_ctx *hctx, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + */ + + ret = hid_bpf_hw_output_report(hctx, data, 2); + if (ret == 2) + return 2; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_output_report = { + .hid_hw_output_report = (void *)hid_test_infinite_loop_output_report, +}; + +struct elem { + struct bpf_wq work; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +static int wq_cb_sleepable(void *map, int *key, void *work) +{ + __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; + struct hid_bpf_ctx *hid_ctx; + + hid_ctx = hid_bpf_allocate_context(*key); + if (!hid_ctx) + return 0; /* EPERM check */ + + hid_bpf_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf)); + + hid_bpf_release_context(hid_ctx); + + return 0; +} + +static int test_inject_input_report_callback(int *key) +{ + struct elem init = {}, *val; + struct bpf_wq *wq; + + if (bpf_map_update_elem(&hmap, key, &init, 0)) + return -1; + + val = bpf_map_lookup_elem(&hmap, key); + if (!val) + return -2; + + wq = &val->work; + if (bpf_wq_init(wq, &hmap, 0) != 0) + return -3; + + if (bpf_wq_set_callback(wq, wq_cb_sleepable, 0)) + return -4; + + if (bpf_wq_start(wq, 0)) + return -5; + + return 0; +} + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_multiply_events_wq, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */); + int hid = hid_ctx->hid->id; + int ret; + + if (!data) + return 0; /* EPERM check */ + + if (data[0] != 1) + return 0; + + ret = test_inject_input_report_callback(&hid); + if (ret) + return ret; + + data[1] += 5; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_multiply_events_wq = { + .hid_device_event = (void *)hid_test_multiply_events_wq, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_multiply_events, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */); + __u8 buf[9]; + int ret; + + if (!data) + return 0; /* EPERM check */ + + if (data[0] != 1) + return 0; + + /* + * we have to use an intermediate buffer as hid_bpf_input_report + * will memset data to \0 + */ + __builtin_memcpy(buf, data, sizeof(buf)); + + buf[0] = 2; + buf[1] += 5; + ret = hid_bpf_try_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf)); + if (ret < 0) + return ret; + + /* + * In real world we should reset the original buffer as data might be garbage now, + * but it actually now has the content of 'buf' + */ + data[1] += 5; + + return 9; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_multiply_events = { + .hid_device_event = (void *)hid_test_multiply_events, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_infinite_loop_input_report, struct hid_bpf_ctx *hctx, + enum hid_report_type report_type, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 6 /* size */); + __u8 buf[6]; + + if (!data) + return 0; /* EPERM check */ + + /* + * we have to use an intermediate buffer as hid_bpf_input_report + * will memset data to \0 + */ + __builtin_memcpy(buf, data, sizeof(buf)); + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + * the return value is ignored so the event is passing to userspace. + */ + + hid_bpf_try_input_report(hctx, report_type, buf, sizeof(buf)); + + /* each time we process the event, we increment by one data[1]: + * after each successful call to hid_bpf_try_input_report, buf + * has been memcopied into data by the kernel. + */ + data[1] += 1; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_input_report = { + .hid_device_event = (void *)hid_test_infinite_loop_input_report, +}; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 9cd56821d0f1..e5db897586bb 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -7,6 +7,7 @@ /* "undefine" structs and enums in vmlinux.h, because we "override" them below */ #define hid_bpf_ctx hid_bpf_ctx___not_used +#define hid_bpf_ops hid_bpf_ops___not_used #define hid_report_type hid_report_type___not_used #define hid_class_request hid_class_request___not_used #define hid_bpf_attach_flags hid_bpf_attach_flags___not_used @@ -20,13 +21,11 @@ #define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used #define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used #define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used -#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used -#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used -#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used #include "vmlinux.h" #undef hid_bpf_ctx +#undef hid_bpf_ops #undef hid_report_type #undef hid_class_request #undef hid_bpf_attach_flags @@ -40,9 +39,6 @@ #undef HID_REQ_SET_REPORT #undef HID_REQ_SET_IDLE #undef HID_REQ_SET_PROTOCOL -#undef HID_BPF_FLAG_NONE -#undef HID_BPF_FLAG_INSERT_HEAD -#undef HID_BPF_FLAG_MAX #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -57,10 +53,8 @@ enum hid_report_type { }; struct hid_bpf_ctx { - __u32 index; - const struct hid_device *hid; + struct hid_device *hid; __u32 allocated_size; - enum hid_report_type report_type; union { __s32 retval; __s32 size; @@ -76,17 +70,28 @@ enum hid_class_request { HID_REQ_SET_PROTOCOL = 0x0B, }; -enum hid_bpf_attach_flags { - HID_BPF_FLAG_NONE = 0, - HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), - HID_BPF_FLAG_MAX, +struct hid_bpf_ops { + int hid_id; + u32 flags; + struct list_head list; + int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type, + u64 source); + int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx); + int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, + u64 source); + int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source); + struct hid_device *hdev; }; +#ifndef BPF_F_BEFORE +#define BPF_F_BEFORE (1U << 3) +#endif + /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz) __ksym; -extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym; extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, @@ -100,5 +105,18 @@ extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, size_t buf__sz) __ksym; +extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, + enum hid_report_type type, + __u8 *data, + size_t buf__sz) __ksym; + +/* bpf_wq implementation */ +extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; +extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; +extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, + int (callback_fn)(void *map, int *key, void *wq), + unsigned int flags__k, void *aux__ign) __ksym; +#define bpf_wq_set_callback(timer, cb, flags) \ + bpf_wq_set_callback_impl(timer, cb, flags, NULL) #endif /* __HID_BPF_HELPERS_H */ diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 05d66ef50c97..f45372cb00fe 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE +CFLAGS := $(CFLAGS) -Wall LDLIBS += -lm ARCH ?= $(shell uname -m 2>/dev/null || echo not) diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile index 32c5fdfd0eef..fd6477911f24 100644 --- a/tools/testing/selftests/iommu/Makefile +++ b/tools/testing/selftests/iommu/Makefile @@ -2,8 +2,6 @@ CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += $(KHDR_INCLUDES) -CFLAGS += -D_GNU_SOURCE - TEST_GEN_PROGS := TEST_GEN_PROGS += iommufd TEST_GEN_PROGS += iommufd_fail_nth diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index edf1c99c9936..6343f4053bd4 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id_not_work = 0; uint32_t test_hwpt_id = 0; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; if (self->device_id) { /* Negative tests */ @@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) sizeof(data)); /* Allocate two nested hwpts sharing one common parent hwpt */ + test_ioctl_fault_alloc(&fault_id, &fault_fd); test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, &nested_hwpt_id[0], IOMMU_HWPT_DATA_SELFTEST, &data, @@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &nested_hwpt_id[1], IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id, + UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], IOMMU_TEST_IOTLB_DEFAULT); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], @@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) _test_ioctl_destroy(self->fd, nested_hwpt_id[1])); test_ioctl_destroy(nested_hwpt_id[0]); + /* Switch from nested_hwpt_id[1] to iopf_hwpt_id */ + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + /* Trigger an IOPF on the device */ + test_cmd_trigger_iopf(self->device_id, fault_fd); + /* Detach from nested_hwpt_id[1] and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); test_ioctl_destroy(nested_hwpt_id[1]); + test_ioctl_destroy(iopf_hwpt_id); /* Detach from the parent hw_pagetable and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(parent_hwpt_id); test_ioctl_destroy(parent_hwpt_id_not_work); + close(fault_fd); + test_ioctl_destroy(fault_id); } else { test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0, &parent_hwpt_id); @@ -1722,10 +1744,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + unsigned long size; int mmap_flags; void *vrc; int rc; + if (variant->buffer_size < MOCK_PAGE_SIZE) { + SKIP(return, + "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu", + variant->buffer_size, MOCK_PAGE_SIZE); + } + self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); @@ -1749,12 +1778,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking) assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; - self->bitmap_size = - variant->buffer_size / self->page_size / BITS_PER_BYTE; + self->bitmap_size = variant->buffer_size / self->page_size; /* Provision with an extra (PAGE_SIZE) for the unaligned case */ - rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + PAGE_SIZE); + size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE); + rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); @@ -1775,51 +1803,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, self->bitmap_size); + munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); teardown_iommufd(self->fd, _metadata); } -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k) +{ + /* half of an u8 index bitmap */ + .buffer_size = 8UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k) +{ + /* one u8 index bitmap */ + .buffer_size = 16UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k) { /* one u32 index bitmap */ - .buffer_size = 128UL * 1024UL, + .buffer_size = 64UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) { /* one u64 index bitmap */ - .buffer_size = 256UL * 1024UL, + .buffer_size = 128UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k) { /* two u64 index and trailing end bitmap */ - .buffer_size = 640UL * 1024UL, + .buffer_size = 320UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, .hugepages = true, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, .hugepages = true, }; diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index f590417cd67a..c5d5e69452b0 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id, + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 8d2b46b2114d..40f6f14ce136 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,8 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + static inline void set_bit(unsigned int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); @@ -153,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ pt_id, NULL)) -static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, +static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id, __u32 flags, __u32 *hwpt_id, __u32 data_type, void *data, size_t data_len) { @@ -165,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, .data_type = data_type, .data_len = data_len, .data_uptr = (uint64_t)data, + .fault_id = ft_id, }; int ret; @@ -177,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, } #define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \ 0)) #define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \ - self->fd, device_id, pt_id, flags, \ + self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0)) #define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) #define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ EXPECT_ERRNO(_errno, \ - _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) +#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \ + data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) +#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \ + hwpt_id, data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) + #define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ ({ \ struct iommu_test_cmd test_cmd = { \ @@ -346,12 +361,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 iova, size_t page_size, size_t pte_page_size, __u64 *bitmap, - __u64 bitmap_size, __u32 flags, + __u64 nbits, __u32 flags, struct __test_metadata *_metadata) { unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; - unsigned long nbits = bitmap_size * BITS_PER_BYTE; unsigned long j, i, nr = nbits / pteset ?: 1; + unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE); __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ @@ -684,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, #define test_cmd_get_hw_capabilities(device_id, caps, mask) \ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) + +static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) +{ + struct iommu_fault_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd); + if (ret) + return ret; + *fault_id = cmd.out_fault_id; + *fault_fd = cmd.out_fault_fd; + return 0; +} + +#define test_ioctl_fault_alloc(fault_id, fault_fd) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \ + fault_fd)); \ + ASSERT_NE(0, *(fault_id)); \ + ASSERT_NE(0, *(fault_fd)); \ + }) + +static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) +{ + struct iommu_test_cmd trigger_iopf_cmd = { + .size = sizeof(trigger_iopf_cmd), + .op = IOMMU_TEST_OP_TRIGGER_IOPF, + .trigger_iopf = { + .dev_id = device_id, + .pasid = 0x1, + .grpid = 0x2, + .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE, + .addr = 0xdeadbeaf, + }, + }; + struct iommu_hwpt_page_response response = { + .code = IOMMUFD_PAGE_RESP_SUCCESS, + }; + struct iommu_hwpt_pgfault fault = {}; + ssize_t bytes; + int ret; + + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd); + if (ret) + return ret; + + bytes = read(fault_fd, &fault, sizeof(fault)); + if (bytes <= 0) + return -EIO; + + response.cookie = fault.cookie; + + bytes = write(fault_fd, &response, sizeof(response)); + if (bytes <= 0) + return -EIO; + + return 0; +} + +#define test_cmd_trigger_iopf(device_id, fault_fd) \ + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 76c2a6945d3e..b8967b6e29d5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -168,15 +168,7 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) static inline void ksft_perror(const char *msg) { -#ifndef NOLIBC ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); -#else - /* - * nolibc doesn't provide strerror() and it seems - * inappropriate to add one, just print the errno. - */ - ksft_print_msg("%s: %d)\n", msg, errno); -#endif } static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...) diff --git a/tools/testing/selftests/devices/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e..cd89fb2bc10e 100644 --- a/tools/testing/selftests/devices/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f..40723a6a083f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <syscall.h> -#include <linux/sched.h> #include "kselftest.h" @@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif -/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{ - struct clone_args args = { - .flags = CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return syscall(__NR_clone3, &args, sizeof(args)); -} - /** * TH_LOG() * @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \ - child = clone3_vfork(); \ + child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \ @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action; - int status; + /* + * Sets status so that WIFEXITED(status) returns true and + * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value + * should never be evaluated because of the waitpid(2) check and + * SIGALRM handling. + */ + int status = KSFT_FAIL << 8; + int child; if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout); - waitpid(t->pid, &status, 0); + child = waitpid(t->pid, &status, 0); + if (child == -1 && errno != EINTR) { + t->exit_code = KSFT_FAIL; + fprintf(TH_LOG_STREAM, + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); + return; + } + alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL; @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else { + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name, @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic; + int child; /* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr); - t->pid = clone3_vfork(); - if (t->pid < 0) { + child = fork(); + if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL; - } else if (t->pid == 0) { + } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else { + t->pid = child; __wait_for_test(t); } ksft_print_msg(" %4s %s\n", diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ac280dcba996..48d32c5aa3eb 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -112,6 +112,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test @@ -145,6 +146,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test TEST_GEN_PROGS_x86_64 += system_counter_offset_test +TEST_GEN_PROGS_x86_64 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test @@ -231,7 +233,7 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index a7de39fa2a0a..d20981663831 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -219,6 +219,7 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_CTR_EL0); GUEST_DONE(); } @@ -490,11 +491,25 @@ static void test_clidr(struct kvm_vcpu *vcpu) test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; } +static void test_ctr(struct kvm_vcpu *vcpu) +{ + u64 ctr; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr); + ctr &= ~CTR_EL0_DIC_MASK; + if (ctr & CTR_EL0_IminLine_MASK) + ctr--; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr); + test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; +} + static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) { u64 val; test_clidr(vcpu); + test_ctr(vcpu); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); val++; @@ -524,7 +539,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); ksft_test_result_pass("%s\n", __func__); } diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index bed316fdecd5..0f268b55fa06 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -60,6 +60,14 @@ #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 #define SET_APIC_DEST_FIELD(x) ((x) << 24) +#define APIC_LVTT 0x320 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) +#define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) +#define APIC_LVT_MASKED (1 << 16) +#define APIC_TMICT 0x380 +#define APIC_TMCCT 0x390 +#define APIC_TDCR 0x3E0 void apic_disable(void); void xapic_enable(void); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8eb57de0b587..a0c1440017bb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -23,6 +23,7 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; +extern uint64_t guest_tsc_khz; /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -277,6 +278,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) @@ -815,6 +817,23 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } +static inline void udelay(unsigned long usec) +{ + uint64_t start, now, cycles; + + GUEST_ASSERT(guest_tsc_khz); + cycles = guest_tsc_khz / 1000 * usec; + + /* + * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is + * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits. + */ + start = rdtsc(); + do { + now = rdtsc(); + } while (now - start < cycles); +} + #define ud2() \ __asm__ __volatile__( \ "ud2\n" \ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index ad00e4761886..56b170b725b3 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; +static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); @@ -434,7 +435,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - pr_info("Random seed: 0x%x\n", guest_random_seed); + if (guest_random_seed != last_guest_seed) { + pr_info("Random seed: 0x%x\n", guest_random_seed); + last_guest_seed = guest_random_seed; + } guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); @@ -2319,7 +2323,8 @@ void __attribute((constructor)) kvm_selftest_init(void) /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); - guest_random_seed = random(); + guest_random_seed = last_guest_seed = random(); + pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 14ee17151a59..b5035c63d516 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c664e446136b..153739f2e201 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -25,6 +25,7 @@ vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; bool is_forced_emulation_enabled; +uint64_t guest_tsc_khz; static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -616,6 +617,11 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) void kvm_arch_vm_post_create(struct kvm_vm *vm) { + int r; + + TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ), + "Require KVM_GET_TSC_KHZ to provide udelay() to guest."); + vm_create_irqchip(vm); vm_init_descriptor_tables(vm); @@ -628,6 +634,11 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } + + r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL); + TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency."); + guest_tsc_khz = r; + sync_global_to_guest(vm, guest_tsc_khz); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -1247,9 +1258,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; + + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 05fcf902e067..49f162573126 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -53,12 +53,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } } -struct memslot_antagonist_args { - struct kvm_vm *vm; - useconds_t delay; - uint64_t nr_modifications; -}; - static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, uint64_t nr_modifications) { diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c new file mode 100644 index 000000000000..0350a8896a2f --- /dev/null +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024, Intel, Inc + * + * Author: + * Isaku Yamahata <isaku.yamahata at gmail.com> + */ +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +/* Arbitrarily chosen values */ +#define TEST_SIZE (SZ_2M + PAGE_SIZE) +#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) +#define TEST_SLOT 10 + +static void guest_code(uint64_t base_gpa) +{ + volatile uint64_t val __used; + int i; + + for (i = 0; i < TEST_NPAGES; i++) { + uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE); + + val = *src; + } + + GUEST_DONE(); +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, + u64 left) +{ + struct kvm_pre_fault_memory range = { + .gpa = gpa, + .size = size, + .flags = 0, + }; + u64 prev; + int ret, save_errno; + + do { + prev = range.size; + ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); + save_errno = errno; + TEST_ASSERT((range.size < prev) ^ (ret < 0), + "%sexpecting range.size to change on %s", + ret < 0 ? "not " : "", + ret < 0 ? "failure" : "success"); + } while (ret >= 0 ? range.size : save_errno == EINTR); + + TEST_ASSERT(range.size == left, + "Completed with %lld bytes left, expected %" PRId64, + range.size, left); + + if (left == 0) + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + else + /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ + __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); +} + +static void __test_pre_fault_memory(unsigned long vm_type, bool private) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = vm_type, + }; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + uint64_t guest_test_phys_mem; + uint64_t guest_test_virt_mem; + uint64_t alignment, guest_page_size; + + vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); + + alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size; +#ifdef __s390x__ + alignment = max(0x100000UL, guest_page_size); +#else + alignment = SZ_2M; +#endif + guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); + guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, TEST_SLOT, TEST_NPAGES, + private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES); + + if (private) + vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); + pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); + pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); + pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + + vcpu_args_set(vcpu, 1, guest_test_virt_mem); + vcpu_run(vcpu); + + run = vcpu->run; + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + break; + } + + kvm_vm_free(vm); +} + +static void test_pre_fault_memory(unsigned long vm_type, bool private) +{ + if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) { + pr_info("Skipping tests for vm_type 0x%lx\n", vm_type); + return; + } + + __test_pre_fault_memory(vm_type, private); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY)); + + test_pre_fault_memory(0, false); +#ifdef __x86_64__ + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false); + test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true); +#endif + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 823c132069b4..0e0712854953 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -6,6 +6,7 @@ * */ #include "kvm_util.h" +#include "ucall_common.h" #define LABEL_ADDRESS(v) ((uint64_t)&(v)) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 222198dd6d04..f92c2fb23fcd 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -49,6 +49,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: @@ -56,6 +57,11 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: @@ -68,6 +74,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: @@ -415,6 +422,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), @@ -422,6 +430,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), @@ -434,6 +447,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), KVM_ISA_EXT_ARR(ZKNH), @@ -939,6 +953,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); @@ -946,6 +961,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); @@ -958,6 +978,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); @@ -995,6 +1016,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_zacas, + &config_zawrs, &config_zba, &config_zbb, &config_zbc, @@ -1002,6 +1024,11 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zbkc, &config_zbkx, &config_zbs, + &config_zca, + &config_zcb, + &config_zcd, + &config_zcf, + &config_zcmop, &config_zfa, &config_zfh, &config_zfhmin, @@ -1014,6 +1041,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zihintntl, &config_zihintpause, &config_zihpm, + &config_zimop, &config_zknd, &config_zkne, &config_zknh, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 69bb94e6b227..f299cbfd23ca 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -15,6 +15,7 @@ #include "processor.h" #include "sbi.h" #include "arch_timer.h" +#include "ucall_common.h" /* Maximum counters(firmware + hardware) */ #define RISCV_MAX_PMU_COUNTERS 64 diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c new file mode 100644 index 000000000000..f8916bb34405 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 Intel Corporation + * + * Verify KVM correctly emulates the APIC bus frequency when the VMM configures + * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by + * programming TMICT (timer initial count) to the largest value possible (so + * that the timer will not expire during the test). Then, after an arbitrary + * amount of time has elapsed, verify TMCCT (timer current count) is within 1% + * of the expected value based on the time elapsed, the APIC bus frequency, and + * the programmed TDCR (timer divide configuration register). + */ + +#include "apic.h" +#include "test_util.h" + +/* + * Possible TDCR values with matching divide count. Used to modify APIC + * timer frequency. + */ +static const struct { + const uint32_t tdcr; + const uint32_t divide_count; +} tdcrs[] = { + {0x0, 2}, + {0x1, 4}, + {0x2, 8}, + {0x3, 16}, + {0x8, 32}, + {0x9, 64}, + {0xa, 128}, + {0xb, 1}, +}; + +static bool is_x2apic; + +static void apic_enable(void) +{ + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); +} + +static uint32_t apic_read_reg(unsigned int reg) +{ + return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); +} + +static void apic_write_reg(unsigned int reg, uint32_t val) +{ + if (is_x2apic) + x2apic_write_reg(reg, val); + else + xapic_write_reg(reg, val); +} + +static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +{ + uint64_t tsc_hz = guest_tsc_khz * 1000; + const uint32_t tmict = ~0u; + uint64_t tsc0, tsc1, freq; + uint32_t tmcct; + int i; + + apic_enable(); + + /* + * Setup one-shot timer. The vector does not matter because the + * interrupt should not fire. + */ + apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED); + + for (i = 0; i < ARRAY_SIZE(tdcrs); i++) { + apic_write_reg(APIC_TDCR, tdcrs[i].tdcr); + apic_write_reg(APIC_TMICT, tmict); + + tsc0 = rdtsc(); + udelay(delay_ms * 1000); + tmcct = apic_read_reg(APIC_TMCCT); + tsc1 = rdtsc(); + + /* + * Stop the timer _after_ reading the current, final count, as + * writing the initial counter also modifies the current count. + */ + apic_write_reg(APIC_TMICT, 0); + + freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0); + /* Check if measured frequency is within 5% of configured frequency. */ + __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100, + "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu", + freq, apic_hz * 95 / 100, apic_hz * 105 / 100, + apic_hz, tdcrs[i].divide_count, tsc_hz); + } + + GUEST_DONE(); +} + +static void test_apic_bus_clock(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + + while (!done) { + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } + } +} + +static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, + bool x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int ret; + + is_x2apic = x2apic; + + vm = vm_create(1); + + sync_global_to_guest(vm, is_x2apic); + + vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); + vcpu_args_set(vcpu, 2, apic_hz, delay_ms); + + ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + NSEC_PER_SEC / apic_hz); + TEST_ASSERT(ret < 0 && errno == EINVAL, + "Setting of APIC bus frequency after vCPU is created should fail."); + + if (!is_x2apic) + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_apic_bus_clock(vcpu); + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name); + puts(""); + printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n"); + printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + /* + * Arbitrarilty default to 25MHz for the APIC bus frequency, which is + * different enough from the default 1GHz to be interesting. + */ + uint64_t apic_hz = 25 * 1000 * 1000; + uint64_t delay_ms = 100; + int opt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); + + while ((opt = getopt(argc, argv, "d:f:h")) != -1) { + switch (opt) { + case 'f': + apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000; + break; + case 'd': + delay_ms = atoi_positive("Delay in milliseconds", optarg); + break; + case 'h': + default: + help(argv[0]); + exit(KSFT_SKIP); + } + } + + run_apic_bus_clock_test(apic_hz, delay_ms, false); + run_apic_bus_clock_test(apic_hz, delay_ms, true); +} diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c index 3cc4b86832fe..7e2bfb3c3f3b 100644 --- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c +++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c @@ -26,19 +26,37 @@ int main(int argc, char *argv[]) TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */ + if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) { + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID); + + /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail"); + } + /* Set KVM_CAP_MAX_VCPU_ID */ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); - /* Try to set KVM_CAP_MAX_VCPU_ID again */ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); TEST_ASSERT(ret < 0, "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); - /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32)); + TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail"); + + /* Create vCPU with id within bounds */ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0); + TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed"); + + close(ret); kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 96446134c00b..698cb36989db 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -7,15 +7,28 @@ #include "pmu.h" #include "processor.h" -/* Number of LOOP instructions for the guest measurement payload. */ -#define NUM_BRANCHES 10 +/* Number of iterations of the loop for the guest measurement payload. */ +#define NUM_LOOPS 10 + +/* Each iteration of the loop retires one branch instruction. */ +#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) + +/* + * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, + * 1 LOOP. + */ +#define NUM_INSNS_PER_LOOP 3 + /* * Number of "extra" instructions that will be counted, i.e. the number of - * instructions that are needed to set up the loop and then disabled the - * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR. + * instructions that are needed to set up the loop and then disable the + * counter. 2 MOV, 2 XOR, 1 WRMSR. */ -#define NUM_EXTRA_INSNS 7 -#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) +#define NUM_EXTRA_INSNS 5 + +/* Total number of instructions retired within the measured section. */ +#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) + static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; @@ -100,7 +113,7 @@ static void guest_assert_event_count(uint8_t idx, GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCHES); + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -120,7 +133,7 @@ static void guest_assert_event_count(uint8_t idx, } sanity_checks: - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); GUEST_ASSERT_EQ(_rdpmc(pmc), count); wrmsr(pmc_msr, 0xdead); @@ -134,8 +147,8 @@ sanity_checks: * before the end of the sequence. * * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the - * start of the loop to force LLC references and misses, i.e. to allow testing - * that those events actually count. + * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and + * misses, i.e. to allow testing that those events actually count. * * If forced emulation is enabled (and specified), force emulation on a subset * of the measured code to verify that KVM correctly emulates instructions and @@ -145,10 +158,11 @@ sanity_checks: #define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \ do { \ __asm__ __volatile__("wrmsr\n\t" \ + " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ + "1:\n\t" \ clflush "\n\t" \ "mfence\n\t" \ - "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \ - FEP "loop .\n\t" \ + FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ @@ -163,9 +177,9 @@ do { \ wrmsr(pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ @@ -500,7 +514,7 @@ static void guest_test_fixed_counters(void) wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0); wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS})); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 26b3e7efe5dd..c15513cd74d1 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -32,8 +32,8 @@ struct __kvm_pmu_event_filter { /* * This event list comprises Intel's known architectural events, plus AMD's - * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs). - * Note, AMD and Intel use the same encoding for instructions retired. + * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the + * same encoding for Instructions Retired. */ kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED); @@ -353,38 +353,13 @@ static bool use_intel_pmu(void) kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED); } -static bool is_zen1(uint32_t family, uint32_t model) -{ - return family == 0x17 && model <= 0x0f; -} - -static bool is_zen2(uint32_t family, uint32_t model) -{ - return family == 0x17 && model >= 0x30 && model <= 0x3f; -} - -static bool is_zen3(uint32_t family, uint32_t model) -{ - return family == 0x19 && model <= 0x0f; -} - /* - * Determining AMD support for a PMU event requires consulting the AMD - * PPR for the CPU or reference material derived therefrom. The AMD - * test code herein has been verified to work on Zen1, Zen2, and Zen3. - * - * Feel free to add more AMD CPUs that are documented to support event - * select 0xc2 umask 0 as "retired branch instructions." + * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding + * 0xc2,0 for Branch Instructions Retired. */ static bool use_amd_pmu(void) { - uint32_t family = kvm_cpu_family(); - uint32_t model = kvm_cpu_model(); - - return host_cpu_is_amd && - (is_zen1(family, model) || - is_zen2(family, model) || - is_zen3(family, model)); + return host_cpu_is_amd && kvm_cpu_family() >= 0x17; } /* diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index d691d86e5bc3..49913784bc82 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -33,6 +33,20 @@ static void guest_not_bsp_vcpu(void *arg) GUEST_DONE(); } +static void test_set_invalid_bsp(struct kvm_vm *vm) +{ + unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + int r; + + if (max_vcpu_id) { + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail"); + } + + r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32)); + TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail"); +} + static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) { int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, @@ -80,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, vm = vm_create(nr_vcpus); + test_set_invalid_bsp(vm); + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c index 7a4a61be119b..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features) int i; for (i = 0; i < 64; i++) { - if (!(supported_features & (1u << i))) + if (!(supported_features & BIT_ULL(i))) test_init2_invalid(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, "unknown feature"); - else if (KNOWN_FEATURES & (1u << i)) + else if (KNOWN_FEATURES & BIT_ULL(i)) test_init2(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); } diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 3c1e9f35b531..3b26bf3cf5b9 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> +#include <linux/keyctl.h> #include <linux/landlock.h> #include <string.h> #include <sys/prctl.h> @@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer) ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); } +TEST(cred_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd, dir_fd; + pid_t child; + int status; + + drop_caps(_metadata); + + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + EXPECT_LE(0, dir_fd); + EXPECT_EQ(0, close(dir_fd)); + + /* Denies opening directories. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0, + 0, 0)) + { + TH_LOG("Failed to join session keyring: %s", strerror(errno)); + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* + * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a + * different session keyring in the child, so make that happen. + */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, + NULL, 0, 0, 0)); + + /* + * KEYCTL_SESSION_TO_PARENT installs credentials on the parent + * that never go through the cred_prepare hook, this path uses + * cred_transfer instead. + */ + EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0, + 0, 0, 0)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + _exit(_metadata->exit_code); + return; + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 0086efaa7b68..29af19c4e9f9 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -2,6 +2,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y +CONFIG_KEYS=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 429535816dbd..d6edcfcb5be8 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -38,6 +38,14 @@ else CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) endif # CROSS_COMPILE +# gcc defaults to silence (off) for the following warnings, but clang defaults +# to the opposite. The warnings are not useful for the kernel itself, which is +# why they have remained disabled in gcc for the main kernel build. And it is +# only due to including kernel data structures in the selftests, that we get the +# warnings from clang. Therefore, disable the warnings for clang builds. +CFLAGS += -Wno-address-of-packed-member +CFLAGS += -Wno-gnu-variable-sized-type-not-at-end + CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as else CC := $(CROSS_COMPILE)gcc @@ -188,6 +196,9 @@ endef clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir) $(CLEAN) +# Build with _GNU_SOURCE by default +CFLAGS += -D_GNU_SOURCE= + # Enables to extend CFLAGS and LDFLAGS from command line, e.g. # make USERCFLAGS=-Werror USERLDFLAGS=-static CFLAGS += $(USERCFLAGS) diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index e3455a6b1158..65c9c058458d 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -4,7 +4,9 @@ . $(dirname $0)/functions.sh -MOD_LIVEPATCH=test_klp_livepatch +MOD_LIVEPATCH1=test_klp_livepatch +MOD_LIVEPATCH2=test_klp_syscall +MOD_LIVEPATCH3=test_klp_callbacks_demo MOD_REPLACE=test_klp_atomic_replace setup_config @@ -16,33 +18,33 @@ setup_config start_test "basic function patching" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 -if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH1: this has been live patched" ]] ; then echo -e "FAIL\n\n" die "livepatch kselftest(s) failed" fi -disable_lp $MOD_LIVEPATCH -unload_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH1 +unload_lp $MOD_LIVEPATCH1 -if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then +if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH1: this has been live patched" ]] ; then echo -e "FAIL\n\n" die "livepatch kselftest(s) failed" fi -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled -livepatch: '$MOD_LIVEPATCH': initializing unpatching transition -livepatch: '$MOD_LIVEPATCH': starting unpatching transition -livepatch: '$MOD_LIVEPATCH': completing unpatching transition -livepatch: '$MOD_LIVEPATCH': unpatching complete -% rmmod $MOD_LIVEPATCH" +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH1': starting unpatching transition +livepatch: '$MOD_LIVEPATCH1': completing unpatching transition +livepatch: '$MOD_LIVEPATCH1': unpatching complete +% rmmod $MOD_LIVEPATCH1" # - load a livepatch that modifies the output from /proc/cmdline and @@ -53,7 +55,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete start_test "multiple livepatches" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg @@ -69,26 +71,26 @@ unload_lp $MOD_REPLACE grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -disable_lp $MOD_LIVEPATCH -unload_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH1 +unload_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -$MOD_LIVEPATCH: this has been live patched +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +$MOD_LIVEPATCH1: this has been live patched % insmod test_modules/$MOD_REPLACE.ko replace=0 livepatch: enabling patch '$MOD_REPLACE' livepatch: '$MOD_REPLACE': initializing patching transition livepatch: '$MOD_REPLACE': starting patching transition livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete -$MOD_LIVEPATCH: this has been live patched +$MOD_LIVEPATCH1: this has been live patched $MOD_REPLACE: this has been live patched % echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition @@ -96,35 +98,57 @@ livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition livepatch: '$MOD_REPLACE': unpatching complete % rmmod $MOD_REPLACE -$MOD_LIVEPATCH: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled -livepatch: '$MOD_LIVEPATCH': initializing unpatching transition -livepatch: '$MOD_LIVEPATCH': starting unpatching transition -livepatch: '$MOD_LIVEPATCH': completing unpatching transition -livepatch: '$MOD_LIVEPATCH': unpatching complete -% rmmod $MOD_LIVEPATCH" +$MOD_LIVEPATCH1: this has been live patched +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH1': starting unpatching transition +livepatch: '$MOD_LIVEPATCH1': completing unpatching transition +livepatch: '$MOD_LIVEPATCH1': unpatching complete +% rmmod $MOD_LIVEPATCH1" # - load a livepatch that modifies the output from /proc/cmdline and # verify correct behavior -# - load an atomic replace livepatch and verify that only the second is active -# - remove the first livepatch and verify that the atomic replace livepatch -# is still active +# - load two additional livepatches and check the number of livepatch modules +# applied +# - load an atomic replace livepatch and check that the other three modules were +# disabled +# - remove all livepatches besides the atomic replace one and verify that the +# atomic replace livepatch is still active # - remove the atomic replace livepatch and verify that none are active start_test "atomic replace livepatch" -load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg +for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do + load_lp "$mod" +done + +mods=(/sys/kernel/livepatch/*) +nmods=${#mods[@]} +if [ "$nmods" -ne 3 ]; then + die "Expecting three modules listed, found $nmods" +fi + load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -unload_lp $MOD_LIVEPATCH +mods=(/sys/kernel/livepatch/*) +nmods=${#mods[@]} +if [ "$nmods" -ne 1 ]; then + die "Expecting only one moduled listed, found $nmods" +fi + +# These modules were disabled by the atomic replace +for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do + unload_lp "$mod" +done grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg @@ -135,13 +159,27 @@ unload_lp $MOD_REPLACE grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -check_result "% insmod test_modules/$MOD_LIVEPATCH.ko -livepatch: enabling patch '$MOD_LIVEPATCH' -livepatch: '$MOD_LIVEPATCH': initializing patching transition -livepatch: '$MOD_LIVEPATCH': starting patching transition -livepatch: '$MOD_LIVEPATCH': completing patching transition -livepatch: '$MOD_LIVEPATCH': patching complete -$MOD_LIVEPATCH: this has been live patched +check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko +livepatch: enabling patch '$MOD_LIVEPATCH1' +livepatch: '$MOD_LIVEPATCH1': initializing patching transition +livepatch: '$MOD_LIVEPATCH1': starting patching transition +livepatch: '$MOD_LIVEPATCH1': completing patching transition +livepatch: '$MOD_LIVEPATCH1': patching complete +$MOD_LIVEPATCH1: this has been live patched +% insmod test_modules/$MOD_LIVEPATCH2.ko +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +livepatch: '$MOD_LIVEPATCH2': patching complete +% insmod test_modules/$MOD_LIVEPATCH3.ko +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +$MOD_LIVEPATCH3: pre_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +$MOD_LIVEPATCH3: post_patch_callback: vmlinux +livepatch: '$MOD_LIVEPATCH3': patching complete % insmod test_modules/$MOD_REPLACE.ko replace=1 livepatch: enabling patch '$MOD_REPLACE' livepatch: '$MOD_REPLACE': initializing patching transition @@ -149,7 +187,9 @@ livepatch: '$MOD_REPLACE': starting patching transition livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete $MOD_REPLACE: this has been live patched -% rmmod $MOD_LIVEPATCH +% rmmod $MOD_LIVEPATCH3 +% rmmod $MOD_LIVEPATCH2 +% rmmod $MOD_LIVEPATCH1 $MOD_REPLACE: this has been live patched % echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh index b76a881d4013..289eb7d4c4b3 100755 --- a/tools/testing/selftests/livepatch/test-syscall.sh +++ b/tools/testing/selftests/livepatch/test-syscall.sh @@ -15,7 +15,10 @@ setup_config start_test "patch getpid syscall while being heavily hammered" -for i in $(seq 1 $(getconf _NPROCESSORS_ONLN)); do +NPROC=$(getconf _NPROCESSORS_ONLN) +MAXPROC=128 + +for i in $(seq 1 $(($NPROC < $MAXPROC ? $NPROC : $MAXPROC))); do ./test_klp-call_getpid & pids[$i]="$!" done diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 6c646afa7395..05a14f5a7bfb 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -18,6 +18,7 @@ check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x" check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--" check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1" check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------" +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--" check_sysfs_value "$MOD_LIVEPATCH" "transition" "0" check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--" @@ -83,4 +84,51 @@ test_klp_callbacks_demo: post_unpatch_callback: vmlinux livepatch: 'test_klp_callbacks_demo': unpatching complete % rmmod test_klp_callbacks_demo" +start_test "sysfs test replace enabled" + +MOD_LIVEPATCH=test_klp_atomic_replace +load_lp $MOD_LIVEPATCH replace=1 + +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "replace" "1" + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=1 +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + +start_test "sysfs test replace disabled" + +load_lp $MOD_LIVEPATCH replace=0 + +check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--" +check_sysfs_value "$MOD_LIVEPATCH" "replace" "0" + +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=0 +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + exit 0 diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 368973f05250..cff124c1eddd 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -31,6 +31,7 @@ SLAB_FREE_CROSS SLAB_FREE_PAGE #SOFTLOCKUP Hangs the system #HARDLOCKUP Hangs the system +#SMP_CALL_LOCKUP Hangs the system #SPINLOCKUP Hangs the system #HUNG_TASK Hangs the system EXEC_DATA diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 0b9ab987601c..da030b43e43b 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -6,6 +6,7 @@ hugepage-shm hugepage-vmemmap hugetlb-madvise hugetlb-read-hwpoison +hugetlb-soft-offline khugepaged map_hugetlb map_populate @@ -49,3 +50,4 @@ hugetlb_fault_after_madv hugetlb_madv_vs_map mseal_test seal_elf +droppable diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 3b49bc3d0a3b..901e0d07765b 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -2,6 +2,7 @@ # Makefile for mm selftests LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h +LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h include local_config.mk @@ -42,6 +43,7 @@ TEST_GEN_FILES += gup_test TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugetlb-madvise TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugetlb-soft-offline TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-mremap TEST_GEN_FILES += hugepage-shm @@ -73,6 +75,8 @@ TEST_GEN_FILES += ksm_functional_tests TEST_GEN_FILES += mdwe_test TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map +TEST_GEN_FILES += hugetlb_dio +TEST_GEN_FILES += droppable ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c new file mode 100644 index 000000000000..f3d9ecf96890 --- /dev/null +++ b/tools/testing/selftests/mm/droppable.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <signal.h> +#include <sys/mman.h> +#include <linux/mman.h> + +#include "../kselftest.h" + +int main(int argc, char *argv[]) +{ + size_t alloc_size = 134217728; + size_t page_size = getpagesize(); + void *alloc; + pid_t child; + + ksft_print_header(); + ksft_set_plan(1); + + alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0); + assert(alloc != MAP_FAILED); + memset(alloc, 'A', alloc_size); + for (size_t i = 0; i < alloc_size; i += page_size) + assert(*(uint8_t *)(alloc + i)); + + child = fork(); + assert(child >= 0); + if (!child) { + for (;;) + *(char *)malloc(page_size) = 'B'; + } + + for (bool done = false; !done;) { + for (size_t i = 0; i < alloc_size; i += page_size) { + if (!*(uint8_t *)(alloc + i)) { + done = true; + break; + } + } + } + kill(child, SIGTERM); + + ksft_test_result_pass("MAP_DROPPABLE: PASS\n"); + exit(KSFT_PASS); +} diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index c463d1c09c9b..ada9156cc497 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <sys/mman.h> #include <errno.h> #include <fcntl.h> /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c new file mode 100644 index 000000000000..f086f0e04756 --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test soft offline behavior for HugeTLB pages: + * - if enable_soft_offline = 0, hugepages should stay intact and soft + * offlining failed with EOPNOTSUPP. + * - if enable_soft_offline = 1, a hugepage should be dissolved and + * nr_hugepages/free_hugepages should be reduced by 1. + * + * Before running, make sure more than 2 hugepages of default_hugepagesz + * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB: + * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <linux/magic.h> +#include <linux/memfd.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <sys/types.h> + +#include "../kselftest.h" + +#ifndef MADV_SOFT_OFFLINE +#define MADV_SOFT_OFFLINE 101 +#endif + +#define EPREFIX " !!! " + +static int do_soft_offline(int fd, size_t len, int expect_errno) +{ + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + int ret = 0; + + if (ftruncate(fd, len) < 0) { + ksft_perror(EPREFIX "ftruncate to len failed"); + return -1; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + ksft_perror(EPREFIX "mmap failed"); + ret = -1; + goto untruncate; + } + + memset(filemap, 0xab, len); + ksft_print_msg("Allocated %#lx bytes of hugetlb pages\n", len); + + hwp_addr = filemap + len / 2; + ret = madvise(hwp_addr, pagesize, MADV_SOFT_OFFLINE); + ksft_print_msg("MADV_SOFT_OFFLINE %p ret=%d, errno=%d\n", + hwp_addr, ret, errno); + if (ret != 0) + ksft_perror(EPREFIX "madvise failed"); + + if (errno == expect_errno) + ret = 0; + else { + ksft_print_msg("MADV_SOFT_OFFLINE should ret %d\n", + expect_errno); + ret = -1; + } + + munmap(filemap, len); +untruncate: + if (ftruncate(fd, 0) < 0) + ksft_perror(EPREFIX "ftruncate back to 0 failed"); + + return ret; +} + +static int set_enable_soft_offline(int value) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + if (value != 0 && value != 1) + return -EINVAL; + + sprintf(cmd, "echo %d > /proc/sys/vm/enable_soft_offline", value); + cmdfile = popen(cmd, "r"); + + if (cmdfile) + ksft_print_msg("enable_soft_offline => %d\n", value); + else { + ksft_perror(EPREFIX "failed to set enable_soft_offline"); + return errno; + } + + pclose(cmdfile); + return 0; +} + +static int read_nr_hugepages(unsigned long hugepage_size, + unsigned long *nr_hugepages) +{ + char buffer[256] = {0}; + char cmd[256] = {0}; + + sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages", + hugepage_size); + FILE *cmdfile = popen(cmd, "r"); + + if (cmdfile == NULL) { + ksft_perror(EPREFIX "failed to popen nr_hugepages"); + return -1; + } + + if (!fgets(buffer, sizeof(buffer), cmdfile)) { + ksft_perror(EPREFIX "failed to read nr_hugepages"); + pclose(cmdfile); + return -1; + } + + *nr_hugepages = atoll(buffer); + pclose(cmdfile); + return 0; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + ksft_perror(EPREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + ksft_perror(EPREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + ksft_print_msg(EPREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +static void test_soft_offline_common(int enable_soft_offline) +{ + int fd; + int expect_errno = enable_soft_offline ? 0 : EOPNOTSUPP; + struct statfs file_stat; + unsigned long hugepagesize_kb = 0; + unsigned long nr_hugepages_before = 0; + unsigned long nr_hugepages_after = 0; + int ret; + + ksft_print_msg("Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + ksft_exit_fail_msg("Failed to create hugetlbfs file\n"); + + hugepagesize_kb = file_stat.f_bsize / 1024; + ksft_print_msg("Hugepagesize is %ldkB\n", hugepagesize_kb); + + if (set_enable_soft_offline(enable_soft_offline) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to set enable_soft_offline\n"); + } + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_before); + + ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno); + + if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) { + close(fd); + ksft_exit_fail_msg("Failed to read nr_hugepages\n"); + } + + ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n", + nr_hugepages_after); + + // No need for the hugetlbfs file from now on. + close(fd); + + if (enable_soft_offline) { + if (nr_hugepages_before != nr_hugepages_after + 1) { + ksft_test_result_fail("MADV_SOFT_OFFLINE should reduced 1 hugepage\n"); + return; + } + } else { + if (nr_hugepages_before != nr_hugepages_after) { + ksft_test_result_fail("MADV_SOFT_OFFLINE reduced %lu hugepages\n", + nr_hugepages_before - nr_hugepages_after); + return; + } + } + + ksft_test_result(ret == 0, + "Test soft-offline when enabled_soft_offline=%d\n", + enable_soft_offline); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_soft_offline_common(1); + test_soft_offline_common(0); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c new file mode 100644 index 000000000000..f9ac20c657ec --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program tests for hugepage leaks after DIO writes to a file using a + * hugepage as the user buffer. During DIO, the user buffer is pinned and + * should be properly unpinned upon completion. This patch verifies that the + * kernel correctly unpins the buffer at DIO completion for both aligned and + * unaligned user buffer offsets (w.r.t page boundary), ensuring the hugepage + * is freed upon unmapping. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include "vm_util.h" +#include "../kselftest.h" + +void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) +{ + int fd; + char *buffer = NULL; + char *orig_buffer = NULL; + size_t h_pagesize = 0; + size_t writesize; + int free_hpage_b = 0; + int free_hpage_a = 0; + const int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; + const int mmap_prot = PROT_READ | PROT_WRITE; + + writesize = end_off - start_off; + + /* Get the default huge page size */ + h_pagesize = default_huge_page_size(); + if (!h_pagesize) + ksft_exit_fail_msg("Unable to determine huge page size\n"); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_fail_perror("Error opening file\n"); + + /* Get the free huge pages before allocation */ + free_hpage_b = get_free_hugepages(); + if (free_hpage_b == 0) { + close(fd); + ksft_exit_skip("No free hugepage, exiting!\n"); + } + + /* Allocate a hugetlb page */ + orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0); + if (orig_buffer == MAP_FAILED) { + close(fd); + ksft_exit_fail_perror("Error mapping memory\n"); + } + buffer = orig_buffer; + buffer += start_off; + + memset(buffer, 'A', writesize); + + /* Write the buffer to the file */ + if (write(fd, buffer, writesize) != (writesize)) { + munmap(orig_buffer, h_pagesize); + close(fd); + ksft_exit_fail_perror("Error writing to file\n"); + } + + /* unmap the huge page */ + munmap(orig_buffer, h_pagesize); + close(fd); + + /* Get the free huge pages after unmap*/ + free_hpage_a = get_free_hugepages(); + + /* + * If the no. of free hugepages before allocation and after unmap does + * not match - that means there could still be a page which is pinned. + */ + if (free_hpage_a != free_hpage_b) { + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + ksft_test_result_fail(": Huge pages not freed!\n"); + } else { + ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b); + ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a); + ksft_test_result_pass(": Huge pages freed successfully !\n"); + } +} + +int main(void) +{ + size_t pagesize = 0; + + ksft_print_header(); + ksft_set_plan(4); + + /* Get base page size */ + pagesize = psize(); + + /* start and end is aligned to pagesize */ + run_dio_using_hugetlb(0, (pagesize * 3)); + + /* start is aligned but end is not aligned */ + run_dio_using_hugetlb(0, (pagesize * 3) - (pagesize / 2)); + + /* start is unaligned and end is aligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3)); + + /* both start and end are unaligned */ + run_dio_using_hugetlb(pagesize / 2, (pagesize * 3) + (pagesize / 2)); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 37de82da9be7..66b4e111b5a2 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include <string.h> #include <stdbool.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <errno.h> #include <fcntl.h> #include <sys/mman.h> @@ -369,7 +369,6 @@ unmap: munmap(map, size); } -#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -430,7 +429,6 @@ close_uffd: unmap: munmap(map, size); } -#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -656,24 +654,8 @@ unmap: munmap(map, size); } -int main(int argc, char **argv) +static void init_global_file_handles(void) { - unsigned int tests = 8; - int err; - - if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { - exit(test_child_ksm()); - } - -#ifdef __NR_userfaultfd - tests++; -#endif - - ksft_print_header(); - ksft_set_plan(tests); - - pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); if (mem_fd < 0) ksft_exit_fail_msg("opening /proc/self/mem failed\n"); @@ -688,15 +670,33 @@ int main(int argc, char **argv) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); + O_RDONLY); ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + +int main(int argc, char **argv) +{ + unsigned int tests = 8; + int err; + + if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + init_global_file_handles(); + exit(test_child_ksm()); + } + + tests++; + + ksft_print_header(); + ksft_set_plan(tests); + + pagesize = getpagesize(); + + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); -#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); -#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc951..d53de2486080 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 9a0597310a76..74c911aa3aea 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <string.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <errno.h> #include <stdio.h> #include <fcntl.h> @@ -28,8 +28,6 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) -#ifdef __NR_memfd_secret - #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -334,13 +332,3 @@ int main(int argc, char *argv[]) ksft_finished(); } - -#else /* __NR_memfd_secret */ - -int main(int argc, char *argv[]) -{ - printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); - return KSFT_SKIP; -} - -#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index b8a7efe9204e..1db134063c38 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include <fcntl.h> #include <signal.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <errno.h> #include <stdlib.h> @@ -265,7 +265,6 @@ munmap: munmap(mmap_mem, mmap_size); } -#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -322,7 +321,6 @@ munmap: munmap(dst, pagesize); free(src); } -#endif /* __NR_userfaultfd */ int main(void) { @@ -335,9 +333,7 @@ int main(void) thpsize / 1024); tests += 3; } -#ifdef __NR_userfaultfd tests += 1; -#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -367,9 +363,7 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ -#ifdef __NR_userfaultfd test_uffdio_copy(); -#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..1e5731bab499 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,6 +3,7 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> +#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1b03bcfaefdf..5a3a9bcba640 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,8 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) diff --git a/tools/testing/selftests/mm/mseal_helpers.h b/tools/testing/selftests/mm/mseal_helpers.h new file mode 100644 index 000000000000..0cfce31c76d2 --- /dev/null +++ b/tools/testing/selftests/mm/mseal_helpers.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define FAIL_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_fail("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define SKIP_TEST_IF_FALSE(test_passed) \ + do { \ + if (!(test_passed)) { \ + ksft_test_result_skip("%s: line:%d\n", \ + __func__, __LINE__); \ + return; \ + } \ + } while (0) + +#define REPORT_TEST_PASS() ksft_test_result_pass("%s\n", __func__) + +#ifndef PKEY_DISABLE_ACCESS +#define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +#define PKEY_DISABLE_WRITE 0x2 +#endif + +#ifndef PKEY_BITS_PER_PKEY +#define PKEY_BITS_PER_PKEY 2 +#endif + +#ifndef PKEY_MASK +#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif + +#ifndef u64 +#define u64 unsigned long long +#endif diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index 41998cf1dcf5..a818f010de47 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -3,7 +3,7 @@ #include <linux/mman.h> #include <sys/mman.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <sys/time.h> #include <sys/resource.h> @@ -17,54 +17,7 @@ #include <sys/ioctl.h> #include <sys/vfs.h> #include <sys/stat.h> - -/* - * need those definition for manually build using gcc. - * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 mseal_test.c -o mseal_test - */ -#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - -#ifndef PKEY_BITS_PER_PKEY -#define PKEY_BITS_PER_PKEY 2 -#endif - -#ifndef PKEY_MASK -#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) -#endif - -#define FAIL_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - -#define SKIP_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - - -#define TEST_END_CHECK() {\ - ksft_test_result_pass("%s\n", __func__);\ - return;\ -test_end:\ - return;\ -} - -#ifndef u64 -#define u64 unsigned long long -#endif +#include "mseal_helpers.h" static unsigned long get_vma_size(void *addr, int *prot) { @@ -287,7 +240,7 @@ static void test_seal_addseal(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_start(void) @@ -315,7 +268,7 @@ static void test_seal_unmapped_start(void) ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_middle(void) @@ -347,7 +300,7 @@ static void test_seal_unmapped_middle(void) ret = sys_mseal(ptr + 3 * page_size, page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_unmapped_end(void) @@ -376,7 +329,7 @@ static void test_seal_unmapped_end(void) ret = sys_mseal(ptr, 2 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_multiple_vmas(void) @@ -407,7 +360,7 @@ static void test_seal_multiple_vmas(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_split_start(void) @@ -432,7 +385,7 @@ static void test_seal_split_start(void) ret = sys_mseal(ptr + page_size, 3 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_split_end(void) @@ -457,7 +410,7 @@ static void test_seal_split_end(void) ret = sys_mseal(ptr, 3 * page_size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_invalid_input(void) @@ -492,7 +445,7 @@ static void test_seal_invalid_input(void) ret = sys_mseal(ptr - page_size, 5 * page_size); FAIL_TEST_IF_FALSE(ret < 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_zero_length(void) @@ -516,7 +469,7 @@ static void test_seal_zero_length(void) ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_zero_address(void) @@ -542,7 +495,7 @@ static void test_seal_zero_address(void) ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_twice(void) @@ -562,7 +515,7 @@ static void test_seal_twice(void) ret = sys_mseal(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect(bool seal) @@ -586,7 +539,7 @@ static void test_seal_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_start_mprotect(bool seal) @@ -616,7 +569,7 @@ static void test_seal_start_mprotect(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_end_mprotect(bool seal) @@ -646,7 +599,7 @@ static void test_seal_end_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_unalign_len(bool seal) @@ -675,7 +628,7 @@ static void test_seal_mprotect_unalign_len(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_unalign_len_variant_2(bool seal) @@ -703,7 +656,7 @@ static void test_seal_mprotect_unalign_len_variant_2(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma(bool seal) @@ -738,7 +691,7 @@ static void test_seal_mprotect_two_vma(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma_with_split(bool seal) @@ -785,7 +738,7 @@ static void test_seal_mprotect_two_vma_with_split(bool seal) PROT_READ | PROT_WRITE); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_partial_mprotect(bool seal) @@ -811,7 +764,7 @@ static void test_seal_mprotect_partial_mprotect(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_two_vma_with_gap(bool seal) @@ -854,7 +807,7 @@ static void test_seal_mprotect_two_vma_with_gap(bool seal) ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ); FAIL_TEST_IF_FALSE(ret == 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_split(bool seal) @@ -891,7 +844,7 @@ static void test_seal_mprotect_split(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mprotect_merge(bool seal) @@ -925,7 +878,7 @@ static void test_seal_mprotect_merge(bool seal) ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); FAIL_TEST_IF_FALSE(ret == 0); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_munmap(bool seal) @@ -950,7 +903,7 @@ static void test_seal_munmap(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } /* @@ -990,7 +943,7 @@ static void test_seal_munmap_two_vma(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } /* @@ -1028,7 +981,7 @@ static void test_seal_munmap_vma_with_gap(bool seal) ret = sys_munmap(ptr, size); FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_start_freed(bool seal) @@ -1068,7 +1021,7 @@ static void test_munmap_start_freed(bool seal) FAIL_TEST_IF_FALSE(size == 0); } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_end_freed(bool seal) @@ -1098,7 +1051,7 @@ static void test_munmap_end_freed(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_munmap_middle_freed(bool seal) @@ -1142,7 +1095,7 @@ static void test_munmap_middle_freed(bool seal) FAIL_TEST_IF_FALSE(size == 0); } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_shrink(bool seal) @@ -1171,7 +1124,7 @@ static void test_seal_mremap_shrink(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_expand(bool seal) @@ -1203,7 +1156,7 @@ static void test_seal_mremap_expand(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move(bool seal) @@ -1236,7 +1189,7 @@ static void test_seal_mremap_move(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_overwrite_prot(bool seal) @@ -1264,7 +1217,7 @@ static void test_seal_mmap_overwrite_prot(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_expand(bool seal) @@ -1295,7 +1248,7 @@ static void test_seal_mmap_expand(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mmap_shrink(bool seal) @@ -1323,7 +1276,7 @@ static void test_seal_mmap_shrink(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == ptr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_shrink_fixed(bool seal) @@ -1354,7 +1307,7 @@ static void test_seal_mremap_shrink_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_expand_fixed(bool seal) @@ -1385,7 +1338,7 @@ static void test_seal_mremap_expand_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_fixed(bool seal) @@ -1415,7 +1368,7 @@ static void test_seal_mremap_move_fixed(bool seal) } else FAIL_TEST_IF_FALSE(ret2 == newAddr); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_fixed_zero(bool seal) @@ -1447,7 +1400,7 @@ static void test_seal_mremap_move_fixed_zero(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_dontunmap(bool seal) @@ -1476,7 +1429,7 @@ static void test_seal_mremap_move_dontunmap(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) @@ -1510,7 +1463,7 @@ static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) } - TEST_END_CHECK(); + REPORT_TEST_PASS(); } @@ -1603,7 +1556,7 @@ static void test_seal_merge_and_split(void) FAIL_TEST_IF_FALSE(size == 22 * page_size); FAIL_TEST_IF_FALSE(prot == 0x4); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_rw(bool seal) @@ -1632,7 +1585,7 @@ static void test_seal_discard_ro_anon_on_rw(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_pkey(bool seal) @@ -1679,7 +1632,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_filebacked(bool seal) @@ -1716,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal) FAIL_TEST_IF_FALSE(!ret); close(fd); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon_on_shared(bool seal) @@ -1745,7 +1698,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } static void test_seal_discard_ro_anon(bool seal) @@ -1775,7 +1728,7 @@ static void test_seal_discard_ro_anon(bool seal) else FAIL_TEST_IF_FALSE(!ret); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index 2d785aca72a5..fc90af2a97b8 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -15,7 +15,7 @@ #include <sys/ioctl.h> #include <sys/stat.h> #include <math.h> -#include <asm/unistd.h> +#include <asm-generic/unistd.h> #include <pthread.h> #include <sys/resource.h> #include <assert.h> @@ -1567,8 +1567,10 @@ int main(int argc, char *argv[]) /* 7. File Hugetlb testing */ mem_size = 2*1024*1024; fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL); + if (fd < 0) + ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno)); mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (mem) { + if (mem != MAP_FAILED) { wp_init(mem, mem_size); wp_addr_range(mem, mem_size); diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 48dc151f8fca..eaa6d1fc5328 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <sys/ptrace.h> #include <setjmp.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3157204b9047..03ac4f2e1cce 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -265,6 +265,7 @@ CATEGORY="hugetlb" run_test ./map_hugetlb CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +CATEGORY="hugetlb" run_test ./hugetlb_dio nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) # For this test, we need one and just one huge page @@ -331,6 +332,12 @@ CATEGORY="hugetlb" run_test ./thuge-gen CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2 CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2 if $RUN_DESTRUCTIVE; then +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) +enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline) +echo 8 > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb-soft-offline +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison fi diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c index f2babec79bb6..7aa1366063e4 100644 --- a/tools/testing/selftests/mm/seal_elf.c +++ b/tools/testing/selftests/mm/seal_elf.c @@ -2,7 +2,7 @@ #define _GNU_SOURCE #include <sys/mman.h> #include <stdint.h> -#include <unistd.h> +#include <asm-generic/unistd.h> #include <string.h> #include <sys/time.h> #include <sys/resource.h> @@ -16,38 +16,7 @@ #include <sys/ioctl.h> #include <sys/vfs.h> #include <sys/stat.h> - -/* - * need those definition for manually build using gcc. - * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 seal_elf.c -o seal_elf - */ -#define FAIL_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - -#define SKIP_TEST_IF_FALSE(c) do {\ - if (!(c)) {\ - ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ - goto test_end;\ - } \ - } \ - while (0) - - -#define TEST_END_CHECK() {\ - ksft_test_result_pass("%s\n", __func__);\ - return;\ -test_end:\ - return;\ -} - -#ifndef u64 -#define u64 unsigned long long -#endif +#include "mseal_helpers.h" /* * define sys_xyx to call syscall directly. @@ -158,7 +127,7 @@ static void test_seal_elf(void) FAIL_TEST_IF_FALSE(ret < 0); ksft_print_msg("somestr is sealed, mprotect is rejected\n"); - TEST_END_CHECK(); + REPORT_TEST_PASS(); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index d3c7f5fb3e7b..e5e8dafc9d94 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr) { size_t i; - int __attribute__((unused)) dummy = 0; + int dummy = 0; srand(time(NULL)); @@ -341,6 +341,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, for (size_t i = 0; i < fd_size; i++) dummy += *(*addr + i); + asm volatile("" : "+r" (dummy)); if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) { ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n"); diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index ea7fd8fe2876..e4370b79b62f 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -13,8 +13,9 @@ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m (warning this will remove all if someone else uses them) */ -#define _GNU_SOURCE 1 +#define _GNU_SOURCE #include <sys/mman.h> +#include <linux/mman.h> #include <stdlib.h> #include <stdio.h> #include <sys/ipc.h> @@ -28,19 +29,23 @@ #include "vm_util.h" #include "../kselftest.h" -#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f #if !defined(MAP_HUGETLB) #define MAP_HUGETLB 0x40000 #endif #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#ifndef SHM_HUGE_SHIFT #define SHM_HUGE_SHIFT 26 +#endif +#ifndef SHM_HUGE_MASK #define SHM_HUGE_MASK 0x3f +#endif +#ifndef SHM_HUGE_2MB #define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#endif +#ifndef SHM_HUGE_1GB #define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) +#endif #define NUM_PAGESIZES 5 #define NUM_PAGES 4 diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..717539eddf98 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,11 +673,7 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { -#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); -#else - return -1; -#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index f78bab0f3d45..a4b83280998a 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,10 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ - +#include <asm-generic/unistd.h> #include "uffd-common.h" -#ifdef __NR_userfaultfd +uint64_t features; #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -247,10 +247,14 @@ static int userfaultfd_stress(void) unsigned long nr; struct uffd_args args[nr_cpus]; uint64_t mem_size = nr_pages * page_size; + int flags = 0; memset(args, 0, sizeof(struct uffd_args) * nr_cpus); - if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) + if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) + flags = UFFD_FEATURE_WP_UNPOPULATED; + + if (uffd_test_ctx_init(flags, NULL)) err("context init failed"); if (posix_memalign(&area, page_size, page_size)) @@ -385,8 +389,6 @@ static void set_test_type(const char *type) static void parse_test_type_arg(const char *raw_type) { - uint64_t features = UFFD_API_FEATURES; - set_test_type(raw_type); if (!test_type) @@ -409,12 +411,15 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (userfaultfd_open(&features)) - err("Userfaultfd open failed"); + if (uffd_get_features(&features)) + err("failed to get available features"); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); + if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + test_uffdio_wp = false; + close(uffd); uffd = -1; } @@ -466,15 +471,3 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } - -#else /* __NR_userfaultfd */ - -#warning "missing __NR_userfaultfd definition" - -int main(void) -{ - printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return KSFT_SKIP; -} - -#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 21ec23206ab4..b3d21eed203d 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,12 +5,11 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ +#include <asm-generic/unistd.h> #include "uffd-common.h" #include "../../../../mm/gup_test.h" -#ifdef __NR_userfaultfd - /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1554,14 +1553,3 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } -#else /* __NR_userfaultfd */ - -#warning "missing __NR_userfaultfd definition" - -int main(void) -{ - printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); - return KSFT_SKIP; -} - -#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index cfbc501290d3..896b3f73fc53 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -9,26 +9,9 @@ #include <sys/mman.h> #include <string.h> +#include "vm_util.h" #include "../kselftest.h" -#ifdef __powerpc64__ -#define PAGE_SIZE (64 << 10) -/* - * This will work with 16M and 2M hugepage size - */ -#define HUGETLB_SIZE (16 << 20) -#elif __aarch64__ -/* - * The default hugepage size for 64k base pagesize - * is 512MB. - */ -#define PAGE_SIZE (64 << 10) -#define HUGETLB_SIZE (512 << 20) -#else -#define PAGE_SIZE (4 << 10) -#define HUGETLB_SIZE (2 << 20) -#endif - /* * The hint addr value is used to allocate addresses * beyond the high address switch boundary. @@ -37,18 +20,8 @@ #define ADDR_MARK_128TB (1UL << 47) #define ADDR_MARK_256TB (1UL << 48) -#define HIGH_ADDR_128TB ((void *) (1UL << 48)) -#define HIGH_ADDR_256TB ((void *) (1UL << 49)) - -#define LOW_ADDR ((void *) (1UL << 30)) - -#ifdef __aarch64__ -#define ADDR_SWITCH_HINT ADDR_MARK_256TB -#define HIGH_ADDR HIGH_ADDR_256TB -#else -#define ADDR_SWITCH_HINT ADDR_MARK_128TB -#define HIGH_ADDR HIGH_ADDR_128TB -#endif +#define HIGH_ADDR_128TB (1UL << 48) +#define HIGH_ADDR_256TB (1UL << 49) struct testcase { void *addr; @@ -59,195 +32,230 @@ struct testcase { unsigned int keep_mapped:1; }; -static struct testcase testcases[] = { - { - /* - * If stack is moved, we could possibly allocate - * this at the requested address. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - /* - * Unless MAP_FIXED is specified, allocation based on hint - * addr is never at requested address or above it, which is - * beyond high address switch boundary in this case. Instead, - * a suitable allocation is found in lower address space. - */ - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))", - .low_addr_required = 1, - }, - { - /* - * Exact mapping at high address switch boundary, should - * be obtained even without MAP_FIXED as area is free. - */ - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, - { - .addr = NULL, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED)", - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1) again", - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)", - .low_addr_required = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = ((void *)(ADDR_SWITCH_HINT)), - .size = PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * PAGE_SIZE, - .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)", - }, -}; +static struct testcase *testcases; +static struct testcase *hugetlb_testcases; +static int sz_testcases, sz_hugetlb_testcases; +static unsigned long switch_hint; -static struct testcase hugetlb_testcases[] = { - { - .addr = NULL, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(NULL, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = LOW_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(LOW_ADDR, MAP_HUGETLB)", - .low_addr_required = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again", - .keep_mapped = 1, - }, - { - .addr = HIGH_ADDR, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)", - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB)", - .keep_mapped = 1, - }, - { - .addr = (void *) -1, - .size = HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(-1, MAP_HUGETLB) again", - }, - { - .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, - .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)", - .low_addr_required = 1, - .keep_mapped = 1, - }, - { - .addr = (void *)(ADDR_SWITCH_HINT), - .size = 2 * HUGETLB_SIZE, - .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)", - }, -}; +/* Initialize testcases inside a function to compute parameters at runtime */ +void testcases_init(void) +{ + unsigned long pagesize = getpagesize(); + unsigned long hugepagesize = default_huge_page_size(); + unsigned long low_addr = (1UL << 30); + unsigned long addr_switch_hint = ADDR_MARK_128TB; + unsigned long high_addr = HIGH_ADDR_128TB; + +#ifdef __aarch64__ + + /* Post LPA2, the lower userspace VA on a 16K pagesize is 47 bits. */ + if (pagesize != (16UL << 10)) { + addr_switch_hint = ADDR_MARK_256TB; + high_addr = HIGH_ADDR_256TB; + } +#endif + + struct testcase t[] = { + { + /* + * If stack is moved, we could possibly allocate + * this at the requested address. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + /* + * Unless MAP_FIXED is specified, allocation based on hint + * addr is never at requested address or above it, which is + * beyond high address switch boundary in this case. Instead, + * a suitable allocation is found in lower address space. + */ + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, (2 * pagesize))", + .low_addr_required = 1, + }, + { + /* + * Exact mapping at high address switch boundary, should + * be obtained even without MAP_FIXED as area is free. + */ + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + { + .addr = NULL, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED)", + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1) again", + }, + { + .addr = ((void *)(addr_switch_hint - pagesize)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, pagesize)", + .low_addr_required = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint - pagesize / 2), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize/2 , 2 * pagesize)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = ((void *)(addr_switch_hint)), + .size = pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint, pagesize)", + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * pagesize, + .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)", + }, + }; + + struct testcase ht[] = { + { + .addr = NULL, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(NULL, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)low_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(low_addr, MAP_HUGETLB)", + .low_addr_required = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(high_addr, MAP_HUGETLB) again", + .keep_mapped = 1, + }, + { + .addr = (void *)high_addr, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(high_addr, MAP_FIXED | MAP_HUGETLB)", + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB)", + .keep_mapped = 1, + }, + { + .addr = (void *) -1, + .size = hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(-1, MAP_HUGETLB) again", + }, + { + .addr = (void *)(addr_switch_hint - pagesize), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS, + .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)", + .low_addr_required = 1, + .keep_mapped = 1, + }, + { + .addr = (void *)(addr_switch_hint), + .size = 2 * hugepagesize, + .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + .msg = "mmap(addr_switch_hint , 2*hugepagesize, MAP_FIXED | MAP_HUGETLB)", + }, + }; + + testcases = malloc(sizeof(t)); + hugetlb_testcases = malloc(sizeof(ht)); + + /* Copy into global arrays */ + memcpy(testcases, t, sizeof(t)); + memcpy(hugetlb_testcases, ht, sizeof(ht)); + + sz_testcases = ARRAY_SIZE(t); + sz_hugetlb_testcases = ARRAY_SIZE(ht); + switch_hint = addr_switch_hint; +} static int run_test(struct testcase *test, int count) { @@ -267,7 +275,7 @@ static int run_test(struct testcase *test, int count) continue; } - if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { + if (t->low_addr_required && p >= (void *)(switch_hint)) { printf("FAILED\n"); ret = KSFT_FAIL; } else { @@ -285,6 +293,20 @@ static int run_test(struct testcase *test, int count) return ret; } +#ifdef __aarch64__ +/* Check if userspace VA > 48 bits */ +static int high_address_present(void) +{ + void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (ptr == MAP_FAILED) + return 0; + + munmap(ptr, 1); + return 1; +} +#endif + static int supported_arch(void) { #if defined(__powerpc64__) @@ -292,7 +314,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return getpagesize() == PAGE_SIZE; + return high_address_present(); #else return 0; #endif @@ -305,8 +327,10 @@ int main(int argc, char **argv) if (!supported_arch()) return KSFT_SKIP; - ret = run_test(testcases, ARRAY_SIZE(testcases)); + testcases_init(); + + ret = run_test(testcases, sz_testcases); if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) - ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases)); + ret = run_test(hugetlb_testcases, sz_hugetlb_testcases); return ret; } diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index a0a75f302904..2c725773cd79 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -57,8 +57,4 @@ check_test_requirements() } check_test_requirements -./va_high_addr_switch - -# In order to run hugetlb testcases, "--run-hugetlb" must be appended -# to the binary. ./va_high_addr_switch --run-hugetlb diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 5c16159d0bcd..fb898850867c 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -323,7 +323,8 @@ void *fake_cont_thread(void *arg) void *cont_thread(void *arg) { char buff[MSG_SIZE]; - int i, priority; + int i; + unsigned int priority; for (i = 0; i < num_cpus_to_pin; i++) if (cpu_threads[i] == pthread_self()) @@ -425,7 +426,8 @@ struct test test2[] = { void *perf_test_thread(void *arg) { char buff[MSG_SIZE]; - int prio_out, prio_in; + int prio_out; + unsigned int prio_in; int i; clockid_t clock; pthread_t *t; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 49a56eb5d036..666ab7d9390b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -43,7 +43,6 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..8eaffd7a641c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ @@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh @@ -53,6 +55,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3b83c797650d..50584479540b 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 000000000000..37368567768c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 000000000000..16d0c172eaeb --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/signalfd.h> +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ + int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ + bool tcp_compliant; +}; + +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); + + setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); + + self->tcp_compliant = true; +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + + ret[i] = send(self->fd[i * 2], buf, len, flags); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + bool printed = false; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } + + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); + } + } +} + +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) + +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + +#define setinlinepair() \ + __setinlinepair(_metadata, self) + +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + siocatmarkpair(true); + + recvpair("world", 5, 5, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("ld", 2, 2, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + } + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("ld", 2, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + epollpair(true); + siocatmarkpair(true); + + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } + + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("rld", 3, 3, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + setinlinepair(); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(false); + } +} + +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 2bfed46e0b19..d66336256580 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -14,12 +14,12 @@ FIXTURE(scm_rights) { - int fd[16]; + int fd[32]; }; FIXTURE_VARIANT(scm_rights) { - char name[16]; + char name[32]; int type; int flags; bool test_listener; @@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata, const FIXTURE_VARIANT(scm_rights) *variant, int n) { + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + if (variant->test_listener) create_listeners(_metadata, self, n); else @@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge) close_sockets(8); } +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889acd5..000000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include <stdio.h> -#include <stdlib.h> -#include <sys/socket.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <netinet/tcp.h> -#include <sys/un.h> -#include <sys/signal.h> -#include <sys/poll.h> - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 7e7ed6c558da..d458b45c775b 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Author: Taehee Yoo <ap420073@gmail.com> diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 04de7a6ba6f3..5b9baf708950 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,7 +26,6 @@ CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -75,7 +74,12 @@ CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m +CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_OPENVSWITCH=m +CONFIG_OPENVSWITCH_GENEVE=m +CONFIG_OPENVSWITCH_GRE=m +CONFIG_OPENVSWITCH_VXLAN=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m @@ -101,3 +105,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 73895711cdf4..5f3c28fc8624 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1737,53 +1737,53 @@ ipv4_rt_dsfield() # DSCP 0x10 should match the specific route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:CE" # Unknown DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" # Null DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:CE" } diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index fa7b59ff4029..224346426ef2 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -39,6 +39,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ + min_max_mtu.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ @@ -70,6 +71,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ router_broadcast.sh \ router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh index 0760a34b7114..a21b7085da2e 100755 --- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -178,6 +178,22 @@ fdb_del() check_err $? "Failed to remove a FDB entry of type ${type}" } +check_fdb_n_learned_support() +{ + if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then + echo "SKIP: iproute2 too old, missing bridge max learned support" + exit $ksft_skip + fi + + ip link add dev br0 type bridge + local learned=$(fdb_get_n_learned) + ip link del dev br0 + if [ "$learned" == "null" ]; then + echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported." + exit $ksft_skip + fi +} + check_accounting_one_type() { local type=$1 is_counted=$2 overrides_learned=$3 @@ -274,6 +290,8 @@ check_limit() done } +check_fdb_n_learned_support + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f1de525cfa55..62a05bca1e82 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -122,6 +122,8 @@ devlink_reload() still_pending=$(devlink resource show "$DEVLINK_DEV" | \ grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" + + udevadm settle } declare -A DEVLINK_ORIG diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index eabbdf00d8ca..ff96bb7535ff 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1134,12 +1134,19 @@ bridge_ageing_time_get() } declare -A SYSCTL_ORIG +sysctl_save() +{ + local key=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) +} + sysctl_set() { local key=$1; shift local value=$1; shift - SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl_save "$key" sysctl -qw $key="$value" } @@ -1218,22 +1225,6 @@ trap_uninstall() tc filter del dev $dev $direction pref 1 flower } -slow_path_trap_install() -{ - # For slow-path testing, we need to install a trap to get to - # slow path the packets that would otherwise be switched in HW. - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_install "$@" - fi -} - -slow_path_trap_uninstall() -{ - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_uninstall "$@" - fi -} - __icmp_capture_add_del() { local add_del=$1; shift @@ -1250,22 +1241,34 @@ __icmp_capture_add_del() icmp_capture_install() { - __icmp_capture_add_del add 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 "" "$tundev" "$filter" } icmp_capture_uninstall() { - __icmp_capture_add_del del 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 "" "$tundev" "$filter" } icmp6_capture_install() { - __icmp_capture_add_del add 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 v6 "$tundev" "$filter" } icmp6_capture_uninstall() { - __icmp_capture_add_del del 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 v6 "$tundev" "$filter" } __vlan_capture_add_del() @@ -1283,12 +1286,18 @@ __vlan_capture_add_del() vlan_capture_install() { - __vlan_capture_add_del add 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del add 100 "$dev" "$filter" } vlan_capture_uninstall() { - __vlan_capture_add_del del 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del del 100 "$dev" "$filter" } __dscp_capture_add_del() @@ -1648,34 +1657,61 @@ __start_traffic() local sip=$1; shift local dip=$1; shift local dmac=$1; shift + local -a mz_args=("$@") $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ - -a own -b $dmac -t "$proto" -q "$@" & + -a own -b $dmac -t "$proto" -q "${mz_args[@]}" & sleep 1 } start_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize udp "$@" + __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize tcp "$@" + __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_traffic() { - start_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic() { - start_tcp_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } stop_traffic() diff --git a/tools/testing/selftests/net/forwarding/min_max_mtu.sh b/tools/testing/selftests/net/forwarding/min_max_mtu.sh new file mode 100755 index 000000000000..97bb8b221bed --- /dev/null +++ b/tools/testing/selftests/net/forwarding/min_max_mtu.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 | +# | | +# | $h1.10 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# | | | +# | $h1 + | +# | | | +# +------------------|-+ +# | +# +------------------|-+ +# | SW | | +# | $swp1 + | +# | | | +# | $swp1.10 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + max_mtu_config_test + max_mtu_traffic_test + min_mtu_config_test + min_mtu_traffic_test +" + +NUM_NETIFS=2 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64 +} + +h1_destroy() +{ + vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $h1 +} + +switch_create() +{ + ip li set dev $swp1 up + vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_destroy() +{ + ip li set dev $swp1 down + vlan_destroy $swp1 10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + + switch_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.1 +} + +ping_ipv6() +{ + ping6_test $h1.10 2001:db8:1::1 +} + +min_max_mtu_get_if() +{ + local dev=$1; shift + local min_max=$1; shift + + ip -d -j link show $dev | jq ".[].$min_max" +} + +ensure_compatible_min_max_mtu() +{ + local min_max=$1; shift + + local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max) + local i + + for ((i = 2; i <= NUM_NETIFS; ++i)); do + local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max) + + if [ $current_mtu -ne $mtu ]; then + return 1 + fi + done +} + +mtu_set_if() +{ + local dev=$1; shift + local mtu=$1; shift + local should_fail=${1:-0}; shift + + mtu_set $dev $mtu 2>/dev/null + check_err_fail $should_fail $? "Set MTU $mtu for $dev" +} + +mtu_set_all_if() +{ + local mtu=$1; shift + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_set_if ${NETIFS[p$i]} $mtu + mtu_set_if ${NETIFS[p$i]}.10 $mtu + done +} + +mtu_restore_all_if() +{ + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_restore ${NETIFS[p$i]}.10 + mtu_restore ${NETIFS[p$i]} + done +} + +mtu_test_ping4() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 20 bytes for IP header + local ping_headers_len=$((20 + 8)) + local pkt_size=$((mtu - ping_headers_len)) + + ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping, packet size: $pkt_size" +} + +mtu_test_ping6() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header + local ping6_headers_len=$((40 + 8)) + local pkt_size=$((mtu - ping6_headers_len)) + + ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping6, packet size: $pkt_size" +} + +max_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local max_mtu=$(min_max_mtu_get_if $dev "max_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $max_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((max_mtu + 1)) $should_fail + mtu_restore $dev + done + + log_test "Test maximum MTU configuration" +} + +max_mtu_traffic_test() +{ + local should_fail + local max_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "max_mtu"; then + log_test_xfail "Topology has incompatible maximum MTU values" + return + fi + + max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu") + + should_fail=0 + mtu_set_all_if $max_mtu + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + should_fail=1 + mtu_set_all_if $((max_mtu - 1)) + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + log_test "Test traffic, packet size is maximum MTU" +} + +min_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local min_mtu=$(min_max_mtu_get_if $dev "min_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $min_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((min_mtu - 1)) $should_fail + mtu_restore $dev + done + + log_test "Test minimum MTU configuration" +} + +min_mtu_traffic_test() +{ + local should_fail=0 + local min_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "min_mtu"; then + log_test_xfail "Topology has incompatible minimum MTU values" + return + fi + + min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu") + mtu_set_all_if $min_mtu + mtu_test_ping4 $min_mtu $should_fail + # Do not test minimum MTU with IPv6, as IPv6 requires higher MTU. + + mtu_restore_all_if + + log_test "Test traffic, packet size is minimum MTU" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index 0266443601bc..921c733ee04f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -74,7 +74,7 @@ test_span_gre_mac() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 @@ -82,29 +82,29 @@ test_span_gre_mac() icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction - log_test "$direction $what: envelope MAC ($tcflags)" + log_test "$direction $what: envelope MAC" } test_two_spans() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" - mirror_install $swp1 egress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + mirror_install $swp1 ingress gt4 "matchall" + mirror_install $swp1 egress gt6 "matchall" + quick_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - fail_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + fail_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" mirror_uninstall $swp1 egress - quick_test_span_gre_dir gt4 ingress - fail_test_span_gre_dir gt6 egress + quick_test_span_gre_dir gt4 8 0 + fail_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - log_test "two simultaneously configured mirrors ($tcflags)" + log_test "two simultaneously configured mirrors" } test_gretap() @@ -131,30 +131,11 @@ test_ip6gretap_mac() test_span_gre_mac gt6 egress "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 6c257ec03756..e3cd48e18eeb 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -196,32 +196,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" } -test_all() -{ - RET=0 - - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index 04fd14b0a9b7..6c7bd33332c2 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -108,30 +108,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index f35313c76fac..909ec956a5e5 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -104,30 +104,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index 0cf4c47a46f9..40ac9dd3aff1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -104,30 +104,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -tests() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -tests - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - tests -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index c53148b1dc63..fe4d7c906a70 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -227,10 +227,10 @@ test_lag_slave() RET=0 tc filter add dev $swp1 ingress pref 999 \ - proto 802.1q flower vlan_ethtype arp $tcflags \ + proto 802.1q flower vlan_ethtype arp \ action pass mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" # Test connectivity through $up_dev when $down_dev is set down. ip link set dev $down_dev down @@ -239,7 +239,7 @@ test_lag_slave() setup_wait_dev $host_dev $ARPING -I br1 192.0.2.130 -qfc 1 sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" # Test lack of connectivity when both slaves are down. ip link set dev $up_dev down @@ -252,7 +252,7 @@ test_lag_slave() mirror_uninstall $swp1 ingress tc filter del dev $swp1 ingress pref 999 - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -265,30 +265,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 5ea9d63915f7..65ae9d960c18 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -73,7 +73,7 @@ test_span_gre_ttl() RET=0 mirror_install $swp1 ingress $tundev \ - "prot ip flower $tcflags ip_prot icmp" + "prot ip flower ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass @@ -81,13 +81,13 @@ test_span_gre_ttl() ip link set dev $tundev type $type ttl 50 sleep 2 - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10" ip link set dev $tundev type $type ttl 100 tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 ingress - log_test "$what: TTL change ($tcflags)" + log_test "$what: TTL change" } test_span_gre_tun_up() @@ -98,15 +98,15 @@ test_span_gre_tun_up() RET=0 ip link set dev $tundev down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev up - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel down/up ($tcflags)" + log_test "$what: tunnel down/up" } test_span_gre_egress_up() @@ -118,8 +118,8 @@ test_span_gre_egress_up() RET=0 ip link set dev $swp3 down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev # After setting the device up, wait for neighbor to get resolved so that # we can expect mirroring to work. @@ -127,10 +127,10 @@ test_span_gre_egress_up() setup_wait_dev $swp3 ping -c 1 -I $swp3 $remote_ip &>/dev/null - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: egress down/up ($tcflags)" + log_test "$what: egress down/up" } test_span_gre_remote_ip() @@ -144,14 +144,14 @@ test_span_gre_remote_ip() RET=0 ip link set dev $tundev type $type remote $wrong_ip - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type remote $correct_ip - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: remote address change ($tcflags)" + log_test "$what: remote address change" } test_span_gre_tun_del() @@ -165,10 +165,10 @@ test_span_gre_tun_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip link del dev $tundev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev tunnel_create $tundev $type $local_ip $remote_ip \ ttl 100 tos inherit $flags @@ -176,11 +176,11 @@ test_span_gre_tun_del() # Recreating the tunnel doesn't reestablish mirroring, so reinstall it # and verify it works for the follow-up tests. mirror_uninstall $swp1 ingress - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel deleted ($tcflags)" + log_test "$what: tunnel deleted" } test_span_gre_route_del() @@ -192,18 +192,18 @@ test_span_gre_route_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip route del $route dev $edev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev ip route add $route dev $edev - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: underlay route removal ($tcflags)" + log_test "$what: underlay route removal" } test_ttl() @@ -244,30 +244,11 @@ test_route_del() test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh index 09389f3b9369..3a84f3ab5856 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -64,12 +64,19 @@ cleanup() test_span_gre_dir_acl() { - test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_gre_dir_ips "$tundev" "$forward_type" \ + "$backward_type" 192.0.2.3 192.0.2.4 } fail_test_span_gre_dir_acl() { - fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + + fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4 } full_test_span_gre_dir_acl() @@ -84,16 +91,15 @@ full_test_span_gre_dir_acl() RET=0 mirror_install $swp1 $direction $tundev \ - "protocol ip flower $tcflags dst_ip $match_dip" - fail_test_span_gre_dir $tundev $direction - test_span_gre_dir_acl "$tundev" "$direction" \ - "$forward_type" "$backward_type" + "protocol ip flower dst_ip $match_dip" + fail_test_span_gre_dir $tundev + test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction # Test lack of mirroring after ACL mirror is uninstalled. - fail_test_span_gre_dir_acl "$tundev" "$direction" + fail_test_span_gre_dir_acl "$tundev" - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } test_gretap() @@ -108,30 +114,11 @@ test_ip6gretap() full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 9edf4cb104a8..1261e6f46e34 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -37,8 +37,14 @@ # | \ / | # | \____________________________________________/ | # | | | -# | + lag2 (team) | -# | 192.0.2.130/28 | +# | + lag2 (team) ------> + gt4-dst (gretap) | +# | 192.0.2.130/28 loc=192.0.2.130 | +# | rem=192.0.2.129 | +# | ttl=100 | +# | tos=inherit | +# | | +# | | +# | | # | | # +---------------------------------------------------------------------------+ @@ -50,9 +56,6 @@ ALL_TESTS=" NUM_NETIFS=6 source lib.sh source mirror_lib.sh -source mirror_gre_lib.sh - -require_command $ARPING vlan_host_create() { @@ -122,16 +125,21 @@ h3_create() { vrf_create vrf-h3 ip link set dev vrf-h3 up - tc qdisc add dev $h3 clsact - tc qdisc add dev $h4 clsact h3_create_team + + tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \ + ttl 100 tos inherit + ip link set dev gt4-dst master vrf-h3 + tc qdisc add dev gt4-dst clsact } h3_destroy() { + tc qdisc del dev gt4-dst clsact + ip link set dev gt4-dst nomaster + tunnel_destroy gt4-dst + h3_destroy_team - tc qdisc del dev $h4 clsact - tc qdisc del dev $h3 clsact ip link set dev vrf-h3 down vrf_destroy vrf-h3 } @@ -188,18 +196,12 @@ setup_prepare() h2_create h3_create switch_create - - trap_install $h3 ingress - trap_install $h4 ingress } cleanup() { pre_cleanup - trap_uninstall $h4 ingress - trap_uninstall $h3 ingress - switch_destroy h3_destroy h2_destroy @@ -218,7 +220,8 @@ test_lag_slave() RET=0 mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" + vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8" # Move $down_dev away from the team. That will prompt change in # txability of the connected device, without changing its upness. The @@ -226,13 +229,14 @@ test_lag_slave() # other slave. ip link set dev $down_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10 # Test lack of connectivity when neither slave is txable. ip link set dev $up_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0 + + vlan_capture_uninstall gt4-dst mirror_uninstall $swp1 ingress # Recreate H3's team device, because mlxsw, which this test is @@ -243,7 +247,7 @@ test_lag_slave() # Wait for ${h,swp}{3,4}. setup_wait - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -256,30 +260,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index 0c36546e131e..20078cc55f24 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift - do_test_span_dir_ips 10 h3-$tundev "$@" + do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift - do_test_span_dir_ips 0 h3-$tundev "$@" + do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2" } test_span_gre_dir_ips() { local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift - test_span_dir_ips h3-$tundev "$@" + test_span_dir_ips h3-$tundev "$forward_type" \ + "$backward_type" "$ip1" "$ip2" } full_test_span_gre_dir_ips() @@ -35,12 +47,12 @@ full_test_span_gre_dir_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + mirror_install $swp1 $direction $tundev "matchall" + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } full_test_span_gre_dir_vlan_ips() @@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" tc filter add dev $h3 ingress pref 77 prot 802.1q \ flower $vlan_match \ action pass - mirror_test v$h1 $ip1 $ip2 $h3 77 10 + mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10' tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } quick_test_span_gre_dir() { - quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir() { - fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 -} + local tundev=$1; shift -test_span_gre_dir() -{ - test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir() { - full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ + "$backward_type" "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir_vlan() { - full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \ + "$forward_type" "$backward_type" \ + "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_stp_ips() @@ -104,27 +134,39 @@ full_test_span_gre_stp_ips() local what=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local h3mac=$(mac_get $h3) RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" bridge link set dev $nbpdev state disabled sleep 1 - fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + fail_test_span_gre_dir_ips $tundev $ip1 $ip2 bridge link set dev $nbpdev state forwarding sleep 1 - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" mirror_uninstall $swp1 ingress - log_test "$what: STP state ($tcflags)" + log_test "$what: STP state" } full_test_span_gre_stp() { - full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \ + 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh index fc0508e40fca..2cbfbecf25c8 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -60,41 +60,32 @@ test_span_gre_neigh() local addr=$1; shift local tundev=$1; shift local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local what=$1; shift RET=0 ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 $direction $tundev "matchall" + fail_test_span_gre_dir $tundev "$forward_type" "$backward_type" ip neigh del dev $swp3 $addr - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction $what: neighbor change ($tcflags)" + log_test "$direction $what: neighbor change" } test_gretap() { - test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" - test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { - test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" - test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap" } trap cleanup EXIT @@ -102,14 +93,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh index 6f9ef1820e93..34bc646938e3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -75,42 +75,31 @@ cleanup() test_gretap() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" # For IPv4, test that there's no mirroring without the route directing # the traffic to tunnel remote address. Then add it and test that # mirroring starts. For IPv6 we can't test this due to the limitation # that routes for locally-specified IPv6 addresses can't be added. - fail_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt4 ip route add 192.0.2.130/32 via 192.0.2.162 - quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt4 ip route del 192.0.2.130/32 via 192.0.2.162 mirror_uninstall $swp1 ingress - log_test "mirror to gre with next-hop remote ($tcflags)" + log_test "mirror to gre with next-hop remote" } test_ip6gretap() { RET=0 - mirror_install $swp1 ingress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt6 ingress + mirror_install $swp1 ingress gt6 "matchall" + quick_test_span_gre_dir gt6 mirror_uninstall $swp1 ingress - log_test "mirror to ip6gre with next-hop remote ($tcflags)" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + log_test "mirror to ip6gre with next-hop remote" } trap cleanup EXIT @@ -118,14 +107,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh index 88cecdb9a861..63689928cb51 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -63,30 +63,11 @@ test_gretap() full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c8a9b5bd841f..1b902cc579f6 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu() RET=0 # Run the pass-test first, to prime neighbor table. - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev # Now forbid the VLAN at the bridge and see it fail. bridge vlan del dev br1 vid 555 self sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev br1 vid 555 self sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge ($tcflags)" + log_test "$what: vlan forbidden at a bridge" } test_gretap_forbidden_cpu() @@ -187,22 +187,22 @@ test_span_gre_forbidden_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev bridge vlan del dev $swp3 vid 555 sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev $swp3 vid 555 # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge egress ($tcflags)" + log_test "$what: vlan forbidden at a bridge egress" } test_gretap_forbidden_egress() @@ -223,30 +223,30 @@ test_span_gre_untagged_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev "matchall" - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged h3_addr_add_del add $h3 sleep 5 - quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + fail_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 h3_addr_add_del add $h3.555 sleep 5 - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" mirror_uninstall $swp1 ingress - log_test "$what: vlan untagged at a bridge egress ($tcflags)" + log_test "$what: vlan untagged at a bridge egress" } test_gretap_untagged_egress() @@ -267,19 +267,19 @@ test_span_gre_fdb_roaming() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev if ! bridge fdb sh dev $swp2 vlan 555 master \ | grep -q $h3mac; then printf "TEST: %-60s [RETRY]\n" \ - "$what: MAC roaming ($tcflags)" + "$what: MAC roaming" # ARP or ND probably reprimed the FDB while the test # was running. We would get a spurious failure. RET=0 @@ -292,11 +292,11 @@ test_span_gre_fdb_roaming() # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: MAC roaming ($tcflags)" + log_test "$what: MAC roaming" } test_gretap_fdb_roaming() @@ -319,30 +319,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 3e8ebeff3019..6bf9d5ae933c 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -44,14 +44,17 @@ mirror_test() local type="icmp echoreq" fi + if [[ -z ${expect//[[:digit:]]/} ]]; then + expect="== $expect" + fi + local t0=$(tc_rule_stats_get $dev $pref) $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((delta $expect)) check_err $? "Expected to capture $expect packets, got $delta." } @@ -59,36 +62,42 @@ do_test_span_dir_ips() { local expect=$1; shift local dev=$1; shift - local direction=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift - icmp_capture_install $dev + icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" mirror_test v$h2 $ip2 $ip1 $dev 100 $expect icmp_capture_uninstall $dev } quick_test_span_dir_ips() { - do_test_span_dir_ips 10 "$@" -} + local dev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift -fail_test_span_dir_ips() -{ - do_test_span_dir_ips 0 "$@" + do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } test_span_dir_ips() { local dev=$1; shift - local direction=$1; shift local forward_type=$1; shift local backward_type=$1; shift local ip1=$1; shift local ip2=$1; shift - quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 10 @@ -99,14 +108,14 @@ test_span_dir_ips() icmp_capture_uninstall $dev } -fail_test_span_dir() -{ - fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 -} - test_span_dir() { - test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_dir_ips "$dev" "$forward_type" "$backward_type" \ + 192.0.2.1 192.0.2.2 } do_test_span_vlan_dir_ips() @@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips() local expect=$1; shift local dev=$1; shift local vid=$1; shift - local direction=$1; shift local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift @@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips() # The traffic is meant for local box anyway, so will be trapped to # kernel. vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" - mirror_test v$h1 $ip1 $ip2 $dev 100 $expect - mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect" + mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect" vlan_capture_uninstall $dev } quick_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 10 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \ + "$ip1" "$ip2" } fail_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 0 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2" } quick_test_span_vlan_dir() { - quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } fail_test_span_vlan_dir() { - fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 0b44e148235e..2f150a414d38 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -40,12 +40,16 @@ setup_prepare() vlan_create $h2 111 v$h2 192.0.2.18/28 bridge vlan add dev $swp2 vid 111 + + trap_install $h3 ingress } cleanup() { pre_cleanup + trap_uninstall $h3 ingress + vlan_destroy $h2 111 vlan_destroy $h1 111 vlan_destroy $h3 555 @@ -63,11 +67,11 @@ test_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_install $swp1 $direction $swp3.555 "matchall" + test_span_dir "$h3.555" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction mirror to vlan ($tcflags)" + log_test "$direction mirror to vlan" } test_vlan() @@ -84,14 +88,12 @@ test_tagged_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ - 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ - 192.0.2.17 192.0.2.18 + mirror_install $swp1 $direction $swp3.555 "matchall" + do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction - log_test "$direction mirror tagged to vlan ($tcflags)" + log_test "$direction mirror tagged to vlan" } test_tagged_vlan() @@ -100,32 +102,11 @@ test_tagged_vlan() test_tagged_vlan_dir egress 0 8 } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - trap_install $h3 ingress - - tests_run - - trap_uninstall $h3 ingress - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_mpath_seed.sh b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh new file mode 100755 index 000000000000..314cb906c1eb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | + $h2 | +# | 192.0.2.1/28 | | | | 192.0.2.34/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:3::2/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# +-------------------|-----+ +-|-----------------------+ +# | R1 | | | | R2 | +# | $rp11 + | | + $rp21 | +# | 192.0.2.2/28 | | 192.0.2.33/28 | +# | 2001:db8:1::2/64 | | 2001:db8:3::1/64 | +# | | | | +# | $rp12 + | | + $rp22 | +# | 192.0.2.17/28 | | | | 192.0.2.18..27/28 | +# | 2001:db8:2::17/64 | | | | 2001:db8:2::18..27/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# `----------' + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_mpath_seed_stability_ipv4 + test_mpath_seed_stability_ipv6 + test_mpath_seed_get + test_mpath_seed_ipv4 + test_mpath_seed_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.34/28 2001:db8:3::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 + ip -4 route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + simple_if_fini $h2 192.0.2.34/28 2001:db8:3::2/64 +} + +router1_create() +{ + simple_if_init $rp11 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $rp12 v$rp11 192.0.2.17/28 2001:db8:2::17/64 +} + +router1_destroy() +{ + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:2::17/64 + simple_if_fini $rp11 192.0.2.2/28 2001:db8:1::2/64 +} + +router2_create() +{ + simple_if_init $rp21 192.0.2.33/28 2001:db8:3::1/64 + __simple_if_init $rp22 v$rp21 192.0.2.18/28 2001:db8:2::18/64 + ip -4 route add 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 +} + +router2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 + ip -4 route del 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:2::18/64 + simple_if_fini $rp21 192.0.2.33/28 2001:db8:3::1/64 +} + +nexthops_create() +{ + local i + for i in $(seq 10); do + ip nexthop add id $((1000 + i)) via 192.0.2.18 dev $rp12 + ip nexthop add id $((2000 + i)) via 2001:db8:2::18 dev $rp12 + done + + ip nexthop add id 1000 group $(seq -s / 1001 1010) hw_stats on + ip nexthop add id 2000 group $(seq -s / 2001 2010) hw_stats on + ip -4 route add 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip -6 route add 2001:db8:3::/64 vrf v$rp11 nhid 2000 +} + +nexthops_destroy() +{ + local i + + ip -6 route del 2001:db8:3::/64 vrf v$rp11 nhid 2000 + ip -4 route del 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip nexthop del id 2000 + ip nexthop del id 1000 + + for i in $(seq 10 -1 1); do + ip nexthop del id $((2000 + i)) + ip nexthop del id $((1000 + i)) + done +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp21=${NETIFS[p5]} + h2=${NETIFS[p6]} + + sysctl_save net.ipv4.fib_multipath_hash_seed + + vrf_prepare + + h1_create + h2_create + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + nexthops_destroy + router2_destroy + router1_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + sysctl_restore net.ipv4.fib_multipath_hash_seed +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:3::2 +} + +test_mpath_seed_get() +{ + RET=0 + + local i + for ((i = 0; i < 100; i++)); do + local seed_w=$((999331 * i)) + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed_w + local seed_r=$(sysctl -n net.ipv4.fib_multipath_hash_seed) + ((seed_r == seed_w)) + check_err $? "mpath seed written as $seed_w, but read as $seed_r" + done + + log_test "mpath seed set/get" +} + +nh_stats_snapshot() +{ + local group_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq -c '[.[].group_stats | sort_by(.id) | .[].packets]' +} + +get_active_nh() +{ + local s0=$1; shift + local s1=$1; shift + + jq -n --argjson s0 "$s0" --argjson s1 "$s1" -f /dev/stdin <<-"EOF" + [range($s0 | length)] | + map($s1[.] - $s0[.]) | + map(if . > 8 then 1 else 0 end) | + index(1) + EOF +} + +probe_nh() +{ + local group_id=$1; shift + local -a mz=("$@") + + local s0=$(nh_stats_snapshot $group_id) + "${mz[@]}" + local s1=$(nh_stats_snapshot $group_id) + + get_active_nh "$s0" "$s1" +} + +probe_seed() +{ + local group_id=$1; shift + local seed=$1; shift + local -a mz=("$@") + + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed + probe_nh "$group_id" "${mz[@]}" +} + +test_mpath_seed() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + local ii + + RET=0 + + local -a tally=(0 0 0 0 0 0 0 0 0 0) + for ((ii = 0; ii < 100; ii++)); do + local act=$(probe_seed $group_id $((999331 * ii)) "${mz[@]}") + ((tally[act]++)) + done + + local tally_str="${tally[@]}" + for ((ii = 0; ii < ${#tally[@]}; ii++)); do + ((tally[ii] > 0)) + check_err $? "NH #$ii not hit, tally='$tally_str'" + done + + log_test "mpath seed $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_ipv4() +{ + test_mpath_seed 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_ipv6() +{ + test_mpath_seed 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +check_mpath_seed_stability() +{ + local seed=$1; shift + local act_0=$1; shift + local act_1=$1; shift + + ((act_0 == act_1)) + check_err $? "seed $seed: active NH moved from $act_0 to $act_1 after seed change" +} + +test_mpath_seed_stability() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + + RET=0 + + local seed_0=0 + local seed_1=3221338814 + local seed_2=3735928559 + + # Initial active NH before touching the seed at all. + local act_ini=$(probe_nh $group_id "${mz[@]}") + + local act_0_0=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_0=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_0=$(probe_seed $group_id $seed_2 "${mz[@]}") + + local act_0_1=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_1=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_1=$(probe_seed $group_id $seed_2 "${mz[@]}") + + check_mpath_seed_stability initial $act_ini $act_0_0 + check_mpath_seed_stability $seed_0 $act_0_0 $act_0_1 + check_mpath_seed_stability $seed_1 $act_1_0 $act_1_1 + check_mpath_seed_stability $seed_2 $act_2_0 $act_2_1 + + log_test "mpath seed stability $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_stability_ipv4() +{ + test_mpath_seed_stability 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_stability_ipv6() +{ + test_mpath_seed_stability 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +trap cleanup EXIT + +setup_prepare +setup_wait +nexthops_create + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 6f0a2e452ba1..3f9d50f1ef9e 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -680,9 +680,9 @@ test_learning() local mac=de:ad:be:ef:13:37 local dst=192.0.2.100 - # Enable learning on the VxLAN device and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx1 type vxlan ageing 10 + # Enable learning on the VxLAN device and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx1 type vxlan ageing 30 ip link set dev vx1 type vxlan learning reapply_config @@ -740,7 +740,7 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 - sleep 20 + sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self check_fail $? diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 3684b813b0f6..f5d207fc770a 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -152,6 +152,15 @@ setup_hsr_interfaces() ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + + ip -net "$ns3" link set address 00:11:22:00:03:01 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:02 dev ns3eth2 + # All Links up ip -net "$ns1" link set ns1eth1 up ip -net "$ns1" link set ns1eth2 up diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh index 1f36785347c0..998103502d5d 100755 --- a/tools/testing/selftests/net/hsr/hsr_redbox.sh +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -96,6 +96,21 @@ setup_hsr_interfaces() ip -n "${ns4}" link set ns4eth1 up ip -n "${ns5}" link set ns5eth1 up + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + ip -net "$ns2" link set address 00:11:22:00:02:03 dev ns2eth3 + + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth2 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth3 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3br1 + + ip -net "$ns4" link set address 00:11:22:00:04:01 dev ns4eth1 + ip -net "$ns5" link set address 00:11:22:00:05:01 dev ns5eth1 + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 9155c914c064..d0219032f773 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -125,28 +125,36 @@ slowwait_for_counter() slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +remove_ns_list() +{ + local item=$1 + local ns + local ns_list=("${NS_LIST[@]}") + NS_LIST=() + + for ns in "${ns_list[@]}"; do + if [ "${ns}" != "${item}" ]; then + NS_LIST+=("${ns}") + fi + done +} + cleanup_ns() { local ns="" - local errexit=0 local ret=0 - # disable errexit temporary - if [[ $- =~ "e" ]]; then - errexit=1 - set +e - fi - for ns in "$@"; do [ -z "${ns}" ] && continue - ip netns delete "${ns}" &> /dev/null + ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 + else + remove_ns_list "${ns}" fi done - [ $errexit -eq 1 ] && set -e return $ret } @@ -159,29 +167,30 @@ cleanup_all_ns() # setup_ns local remote setup_ns() { - local ns="" local ns_name="" local ns_list=() - local ns_exist= for ns_name in "$@"; do + # avoid conflicts with local var: internal error + if [ "${ns_name}" = "ns_name" ]; then + echo "Failed to setup namespace '${ns_name}': invalid name" + cleanup_ns "${ns_list[@]}" + exit $ksft_fail + fi + # Some test may setup/remove same netns multi times - if unset ${ns_name} 2> /dev/null; then - ns="${ns_name,,}-$(mktemp -u XXXXXX)" - eval readonly ${ns_name}="$ns" - ns_exist=false + if [ -z "${!ns_name}" ]; then + eval "${ns_name}=${ns_name,,}-$(mktemp -u XXXXXX)" else - eval ns='$'${ns_name} - cleanup_ns "$ns" - ns_exist=true + cleanup_ns "${!ns_name}" fi - if ! ip netns add "$ns"; then + if ! ip netns add "${!ns_name}"; then echo "Failed to create namespace $ns_name" cleanup_ns "${ns_list[@]}" return $ksft_skip fi - ip -n "$ns" link set lo up - ! $ns_exist && ns_list+=("$ns") + ip -n "${!ns_name}" link set lo up + ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") } @@ -190,10 +199,10 @@ tc_rule_stats_get() { local dev=$1; shift local pref=$1; shift - local dir=$1; shift + local dir=${1:-ingress}; shift local selector=${1:-.packets}; shift - tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ + tc -j -s filter show dev $dev $dir pref $pref \ | jq ".[1].options.actions[].stats$selector" } diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 4769b4eb1ea1..f26c20df9db4 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -6,6 +6,7 @@ import sys import time import traceback from .consts import KSFT_MAIN_NAME +from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -57,6 +58,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_lt(a, b, comment=""): + if a >= b: + _fail("Check failed", a, ">=", b, comment) + + class ksft_raises: def __init__(self, expected_type): self.exception = None @@ -103,6 +109,24 @@ def ktap_result(ok, cnt=1, case="", comment=""): print(res) +def ksft_flush_defer(): + global KSFT_RESULT + + i = 0 + qlen_start = len(global_defer_queue) + while global_defer_queue: + i += 1 + entry = global_defer_queue.pop() + try: + entry.exec_only() + except: + ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Defer Exception|", line) + KSFT_RESULT = False + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -122,32 +146,41 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): global KSFT_RESULT cnt = 0 + stop = False for case in cases: KSFT_RESULT = True cnt += 1 + comment = "" + cnt_key = "" + try: case(*args) except KsftSkipEx as e: - ktap_result(True, cnt, case, comment="SKIP " + str(e)) - totals['skip'] += 1 - continue + comment = "SKIP " + str(e) + cnt_key = 'skip' except KsftXfailEx as e: - ktap_result(True, cnt, case, comment="XFAIL " + str(e)) - totals['xfail'] += 1 - continue - except Exception as e: + comment = "XFAIL " + str(e) + cnt_key = 'xfail' + except BaseException as e: + stop |= isinstance(e, KeyboardInterrupt) tb = traceback.format_exc() for line in tb.strip().split('\n'): ksft_pr("Exception|", line) - ktap_result(False, cnt, case) - totals['fail'] += 1 - continue - - ktap_result(KSFT_RESULT, cnt, case) - if KSFT_RESULT: - totals['pass'] += 1 - else: - totals['fail'] += 1 + if stop: + ksft_pr("Stopping tests due to KeyboardInterrupt.") + KSFT_RESULT = False + cnt_key = 'fail' + + ksft_flush_defer() + + if not cnt_key: + cnt_key = 'pass' if KSFT_RESULT else 'fail' + + ktap_result(KSFT_RESULT, cnt, case, comment=comment) + totals[cnt_key] += 1 + + if stop: + break print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 0540ea24921d..72590c3f90f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,12 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json as _json import random import re +import socket import subprocess import time +class CmdExitFailure(Exception): + pass + + class cmd: def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): if ns: @@ -41,8 +47,8 @@ class cmd: if self.proc.returncode != 0 and fail: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] - raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % + (self.proc.args, stdout, stderr)) class bkg(cmd): @@ -60,6 +66,40 @@ class bkg(cmd): return self.process(terminate=self.terminate, fail=self.check_fail) +global_defer_queue = [] + + +class defer: + def __init__(self, func, *args, **kwargs): + global global_defer_queue + + if not callable(func): + raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") + + self.func = func + self.args = args + self.kwargs = kwargs + + self._queue = global_defer_queue + self._queue.append(self) + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.exec() + + def exec_only(self): + self.func(*self.args, **self.kwargs) + + def cancel(self): + self._queue.remove(self) + + def exec(self): + self.cancel() + self.exec_only() + + def tool(name, args, json=None, ns=None, host=None): cmd_str = name + ' ' if json: @@ -77,11 +117,24 @@ def ip(args, json=None, ns=None, host=None): return tool('ip', args, json=json, host=host) +def ethtool(args, json=None, ns=None, host=None): + return tool('ethtool', args, json=json, ns=ns, host=host) + + def rand_port(): """ - Get unprivileged port, for now just random, one day we may decide to check if used. + Get a random unprivileged port, try to make sure it's not already used. """ - return random.randint(10000, 65535) + for _ in range(1000): + port = random.randint(10000, 65535) + try: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + s.bind(("", port)) + return port + except OSError as e: + if e.errno != errno.EADDRINUSE: + raise + raise Exception("Can't find any free unprivileged port") def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2b66c5fa71eb..108aeeb84ef1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2249,9 +2249,10 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 6ffa9b7a3260..438280e68434 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -1,6 +1,9 @@ #! /bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/../lib.sh" +. "$(dirname "${0}")/../net_helper.sh" + readonly KSFT_PASS=0 readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 @@ -361,20 +364,7 @@ mptcp_lib_check_transfer() { # $1: ns, $2: port mptcp_lib_wait_local_port_listen() { - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local _ - for _ in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ - {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done + wait_local_port_listen "${@}" "tcp" } mptcp_lib_check_output() { @@ -438,17 +428,13 @@ mptcp_lib_check_tools() { } mptcp_lib_ns_init() { - local sec rndh - - sec=$(date +%s) - rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + if ! setup_ns "${@}"; then + mptcp_lib_pr_fail "Failed to setup namespaces ${*}" + exit ${KSFT_FAIL} + fi local netns for netns in "${@}"; do - eval "${netns}=${netns}-${rndh}" - - ip netns add "${!netns}" || exit ${KSFT_SKIP} - ip -net "${!netns}" link set lo up ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 @@ -456,9 +442,10 @@ mptcp_lib_ns_init() { } mptcp_lib_ns_exit() { + cleanup_ns "${@}" + local netns for netns in "${@}"; do - ip netns del "${netns}" rm -f /tmp/"${netns}".{nstat,out} done } diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9e2981f2d7f5..9cb05978269d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -160,10 +160,12 @@ make_connection() local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" + local client_addr="10.0.1.2" local listen_addr="0.0.0.0" if [ "$is_v6" = "v6" ] then connect_addr="dead:beef:1::1" + client_addr="dead:beef:1::2" listen_addr="::" app_port=$app6_port else @@ -206,6 +208,7 @@ make_connection() [ "$server_serverside" = 1 ] then test_pass + print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap @@ -297,7 +300,7 @@ test_announce() ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -306,7 +309,7 @@ test_announce() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 - print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -316,7 +319,7 @@ test_announce() client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -327,7 +330,7 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -336,7 +339,7 @@ test_announce() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -346,7 +349,7 @@ test_announce() server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -380,7 +383,7 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + print_test "RM_ADDR id:client ns2 => ns1, invalid token" local type type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] @@ -394,7 +397,7 @@ test_remove() local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id" type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then @@ -407,7 +410,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -416,7 +419,7 @@ test_remove() client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -424,7 +427,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id - print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR6 id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -434,7 +437,7 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -443,7 +446,7 @@ test_remove() server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -451,7 +454,7 @@ test_remove() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id - print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR6 id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -479,8 +482,14 @@ verify_subflow_events() local locid local remid local info + local e_dport_txt - info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + # only display the fixed ports + if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then + e_dport_txt=":${e_dport}" + fi + + info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -766,7 +775,7 @@ test_subflows_v4_v6_mix() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -861,7 +870,7 @@ test_listener() local listener_pid=$! sleep 0.5 - print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + print_test "CREATE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -878,13 +887,14 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 - print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + print_test "CLOSE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } print_title "Make connections" make_connection make_connection "v6" +print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)" test_announce test_remove diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index bdc03a2097e8..7ea5fb28c93d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -85,6 +85,7 @@ static bool cfg_rx; static int cfg_runtime_ms = 4200; static int cfg_verbose; static int cfg_waittime_ms = 500; +static int cfg_notification_limit = 32; static bool cfg_zerocopy; static socklen_t cfg_alen; @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; static int zerocopied = -1; static uint32_t next_completion; +static uint32_t sends_since_notify; static unsigned long gettimeofday_ms(void) { @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) error(1, errno, "send"); if (cfg_verbose && ret != len) fprintf(stderr, "send: ret=%u != %u\n", ret, len); + sends_since_notify++; if (len) { packets++; @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain) /* Detect notification gaps. These should not happen often, if at all. * Gaps can occur due to drops, reordering and retransmissions. */ - if (lo != next_completion) + if (cfg_verbose && lo != next_completion) fprintf(stderr, "gap: %u..%u does not append to %u\n", lo, hi, next_completion); next_completion = hi + 1; @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain) static void do_recv_completions(int fd, int domain) { while (do_recv_completion(fd, domain)) {} + sends_since_notify = 0; } /* Wait for all remaining completions on the errqueue */ @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol) else do_sendmsg(fd, &msg, cfg_zerocopy, domain); + if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit) + do_recv_completions(fd, domain); + while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) do_recv_completions(fd, domain); @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv) if (cfg_ifindex == 0) error(1, errno, "invalid iface: %s", optarg); break; + case 'l': + cfg_notification_limit = strtoul(optarg, NULL, 0); + break; case 'm': cfg_cork_mixed = true; break; diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 6d66240e149c..47088b005390 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload" +BUGS="flush_remove_add reload net_port_proto_match" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -371,6 +371,22 @@ race_repeat 0 perf_duration 0 " +TYPE_net_port_proto_match=" +display net,port,proto +type_spec ipv4_addr . inet_service . inet_proto +chain_spec ip daddr . udp dport . meta l4proto +dst addr4 port proto +src +start 1 +count 9 +src_delta 9 +tools sendip bash +proto udp + +race_repeat 0 + +perf_duration 0 +" # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1555,6 +1571,64 @@ test_bug_reload() { nft flush ruleset } +# - add ranged element, check that packets match it +# - delete element again, check it is gone +test_bug_net_port_proto_match() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "add element inet filter test { $elem }" || return 1 + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-add: should have returned $elem but got $got" + return 1 + fi + done + done + + # recheck after set was filled + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-fill: should have returned $elem but got $got" + return 1 + fi + done + done + + # random del and re-fetch + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local rnd=$((RANDOM%10)) + local got="" + + elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))") + if [ $rnd -gt 0 ];then + continue + fi + + nft "delete element inet filter test { $elem }" + got=$(nft "get element inet filter test { $elem }" 2>/dev/null) + if [ $? -eq 0 ];then + err "post-delete: query for $elem returned $got instead of error." + return 1 + fi + done + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 8538f08c64c2..c61d23a8c88d 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -375,6 +375,42 @@ EOF wait 2>/dev/null } +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + ret=1 + fi +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -413,5 +449,6 @@ test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf +test_queue_removal exit $ret diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..45c34a3b9aae --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# This test checks that the network buffer sysctls are present +# in a network namespaces, and that they are readonly. + +source lib.sh + +cleanup() { + cleanup_ns $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +setup_ns test_ns + +for sc in {r,w}mem_{default,max}; do + # check that this is writable in a netns + [ -w "/proc/sys/net/core/$sc" ] || + fail "$sc isn't writable in the init netns!" + + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" || + fail "Can't write $sc in init netns!" + + # check that the value is read from the init netns + [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] || + fail "Value for $sc mismatch!" + + # check that this isn't writable in a netns + ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] && + fail "$sc is writable in a netns!" +done + +echo 'Test passed OK' diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 5cae53543849..cc0bfae2bafa 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # OVS kernel module self tests @@ -11,6 +11,11 @@ ksft_skip=4 PAUSE_ON_FAIL=no VERBOSE=0 TRACING=0 +WAIT_TIMEOUT=5 + +if test "X$KSFT_MACHINE_SLOW" == "Xyes"; then + WAIT_TIMEOUT=10 +fi tests=" arp_ping eth-arp: Basic arp ping between two NS @@ -20,10 +25,37 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces - drop_reason drop: test drop reasons are emitted" + drop_reason drop: test drop reasons are emitted + psample psample: Sampling packets with psample" info() { - [ $VERBOSE = 0 ] || echo $* + [ "${ovs_dir}" != "" ] && + echo "`date +"[%m-%d %H:%M:%S]"` $*" >> ${ovs_dir}/debug.log + [ $VERBOSE = 0 ] || echo $* +} + +ovs_wait() { + info "waiting $WAIT_TIMEOUT s for: $@" + + if "$@" ; then + info "wait succeeded immediately" + return 0 + fi + + # A quick re-check helps speed up small races in fast systems. + # However, fractional sleeps might not necessarily work. + local start=0 + sleep 0.1 || { sleep 1; start=1; } + + for (( i=start; i<WAIT_TIMEOUT; i++ )); do + if "$@" ; then + info "wait succeeded after $i seconds" + return 0 + fi + sleep 1 + done + info "wait failed after $i seconds" + return 1 } ovs_base=`pwd` @@ -65,7 +97,8 @@ ovs_setenv() { ovs_sbx() { if test "X$2" != X; then - (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log) + (ovs_setenv $1; shift; + info "run cmd: $@"; "$@" >> ${ovs_dir}/debug.log) else ovs_setenv $1 fi @@ -102,12 +135,21 @@ ovs_netns_spawn_daemon() { shift netns=$1 shift - info "spawning cmd: $*" - ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + if [ "$netns" == "_default" ]; then + $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + else + ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + fi pid=$! ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null" } +ovs_spawn_daemon() { + sbx=$1 + shift + ovs_netns_spawn_daemon $sbx "_default" $* +} + ovs_add_netns_and_veths () { info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}" ovs_sbx "$1" ip netns add "$3" || return 1 @@ -139,7 +181,7 @@ ovs_add_flow () { info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4" ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4" if [ $? -ne 0 ]; then - echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log + info "Flow [ $3 : $4 ] failed" return 1 fi return 0 @@ -170,6 +212,19 @@ ovs_drop_reason_count() return `echo "$perf_output" | grep "$pattern" | wc -l` } +ovs_test_flow_fails () { + ERR_MSG="Flow actions may not be safe on all matching packets" + + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow $@ &> /dev/null $@ && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + return 1 + fi + return 0 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -184,6 +239,91 @@ usage() { exit 1 } + +# psample test +# - use psample to observe packets +test_psample() { + sbx_add "test_psample" || return $? + + # Add a datapath with per-vport dispatching. + ovs_add_dp "test_psample" psample -V 2:1 || return 1 + + info "create namespaces" + ovs_add_netns_and_veths "test_psample" "psample" \ + client c0 c1 172.31.110.10/24 -u || return 1 + ovs_add_netns_and_veths "test_psample" "psample" \ + server s0 s1 172.31.110.20/24 -u || return 1 + + # Check if psample actions can be configured. + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null + if [ $? == 1 ]; then + info "no support for psample - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_psample" psample + + # Test action verification. + OLDIFS=$IFS + IFS='*' + min_key='in_port(1),eth(),eth_type(0x0800),ipv4()' + for testcase in \ + "cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \ + "no group with cookie"*"psample(cookie=abcd)" \ + "no group"*"psample()"; + do + set -- $testcase; + ovs_test_flow_fails "test_psample" psample $min_key $2 + if [ $? == 1 ]; then + info "failed - $1" + return 1 + fi + done + IFS=$OLDIFS + + ovs_del_flows "test_psample" psample + # Allow ARP + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_psample" psample \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Sample first 14 bytes of all traffic. + ovs_add_flow "test_psample" psample \ + "in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "trunc(14),psample(group=1,cookie=c0ffee),2" + + # Sample all traffic. In this case, use a sample() action with both + # psample and an upcall emulating simultaneous local sampling and + # sFlow / IPFIX. + nlpid=$(grep -E "listening on upcall packet handler" \ + $ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ') + + ovs_add_flow "test_psample" psample \ + "in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1" + + # Record psample data. + ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events + ovs_wait grep -q "listening for psample events" ${ovs_dir}/stdout + + # Send a single ping. + ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1 + + # We should have received one userspace action upcall and 2 psample packets. + ovs_wait grep -q "userspace action command" $ovs_dir/s0.out || return 1 + + # client -> server samples should only contain the first 14 bytes of the packet. + ovs_wait grep -qE "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \ + $ovs_dir/stdout || return 1 + + ovs_wait grep -q "rate:4294967295,group:2,cookie:eeff0c" $ovs_dir/stdout || return 1 + + return 0 +} + # drop_reason test # - drop packets and verify the right drop reason is reported test_drop_reason() { @@ -599,7 +739,8 @@ test_upcall_interfaces() { ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \ 172.31.110.1/24 -u || return 1 - sleep 1 + ovs_wait grep -q "listening on upcall packet handler" ${ovs_dir}/left0.out + info "sending arping" ip netns exec upc arping -I l0 172.31.110.20 -c 1 \ >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr @@ -613,16 +754,20 @@ run_test() { tname="$1" tdesc="$2" - if ! lsmod | grep openvswitch >/dev/null 2>&1; then - stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" - return $ksft_skip - fi - if python3 ovs-dpctl.py -h 2>&1 | \ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi + + python3 ovs-dpctl.py show >/dev/null 2>&1 || \ + echo "[DPCTL] show exception." + + if ! lsmod | grep openvswitch >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" + return $ksft_skip + fi + printf "TEST: %-60s [START]\n" "${tname}" unset IFS diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1dd057afd3fb..8a0396bfaf99 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -8,8 +8,10 @@ import argparse import errno import ipaddress import logging +import math import multiprocessing import re +import socket import struct import sys import time @@ -26,13 +28,16 @@ try: from pyroute2.netlink import genlmsg from pyroute2.netlink import nla from pyroute2.netlink import nlmsg_atoms + from pyroute2.netlink.event import EventSocket from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + from pyroute2.netlink.nlsocket import Marshal import pyroute2 + import pyroute2.iproute except ModuleNotFoundError: print("Need to install the python pyroute2 package >= 0.6.") - sys.exit(0) + sys.exit(1) OVS_DATAPATH_FAMILY = "ovs_datapath" @@ -58,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2 OVS_FLOW_CMD_GET = 3 OVS_FLOW_CMD_SET = 4 +UINT32_MAX = 0xFFFFFFFF def macstr(mac): outstr = ":".join(["%02X" % i for i in mac]) @@ -198,6 +204,18 @@ def convert_ipv4(data): return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask)) +def convert_ipv6(data): + ip, _, mask = data.partition('/') + + if not ip: + ip = mask = 0 + elif not mask: + mask = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' + elif mask.isdigit(): + mask = ipaddress.IPv6Network("::/" + mask).hostmask + + return ipaddress.IPv6Address(ip).packed, ipaddress.IPv6Address(mask).packed + def convert_int(size): def convert_int_sized(data): value, _, mask = data.partition('/') @@ -267,6 +285,75 @@ def parse_extract_field( return str_skipped, data +def parse_attrs(actstr, attr_desc): + """Parses the given action string and returns a list of netlink + attributes based on a list of attribute descriptions. + + Each element in the attribute description list is a tuple such as: + (name, attr_name, parse_func) + where: + name: is the string representing the attribute + attr_name: is the name of the attribute as defined in the uAPI. + parse_func: is a callable accepting a string and returning either + a single object (the parsed attribute value) or a tuple of + two values (the parsed attribute value and the remaining string) + + Returns a list of attributes and the remaining string. + """ + def parse_attr(actstr, key, func): + actstr = actstr[len(key) :] + + if not func: + return None, actstr + + delim = actstr[0] + actstr = actstr[1:] + + if delim == "=": + pos = strcspn(actstr, ",)") + ret = func(actstr[:pos]) + else: + ret = func(actstr) + + if isinstance(ret, tuple): + (datum, actstr) = ret + else: + datum = ret + actstr = actstr[strcspn(actstr, ",)"):] + + if delim == "(": + if not actstr or actstr[0] != ")": + raise ValueError("Action contains unbalanced parentheses") + + actstr = actstr[1:] + + actstr = actstr[strspn(actstr, ", ") :] + + return datum, actstr + + attrs = [] + attr_desc = list(attr_desc) + while actstr and actstr[0] != ")" and attr_desc: + found = False + for i, (key, attr, func) in enumerate(attr_desc): + if actstr.startswith(key): + datum, actstr = parse_attr(actstr, key, func) + attrs.append([attr, datum]) + found = True + del attr_desc[i] + + if not found: + raise ValueError("Unknown attribute: '%s'" % actstr) + + actstr = actstr[strspn(actstr, ", ") :] + + if actstr[0] != ")": + raise ValueError("Action string contains extra garbage or has " + "unbalanced parenthesis: '%s'" % actstr) + + return attrs, actstr[1:] + + class ovs_dp_msg(genlmsg): # include the OVS version # We need a custom header rather than just being able to rely on @@ -282,15 +369,15 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_UNSPEC", "none"), ("OVS_ACTION_ATTR_OUTPUT", "uint32"), ("OVS_ACTION_ATTR_USERSPACE", "userspace"), - ("OVS_ACTION_ATTR_SET", "none"), + ("OVS_ACTION_ATTR_SET", "ovskey"), ("OVS_ACTION_ATTR_PUSH_VLAN", "none"), ("OVS_ACTION_ATTR_POP_VLAN", "flag"), - ("OVS_ACTION_ATTR_SAMPLE", "none"), + ("OVS_ACTION_ATTR_SAMPLE", "sample"), ("OVS_ACTION_ATTR_RECIRC", "uint32"), ("OVS_ACTION_ATTR_HASH", "none"), ("OVS_ACTION_ATTR_PUSH_MPLS", "none"), ("OVS_ACTION_ATTR_POP_MPLS", "flag"), - ("OVS_ACTION_ATTR_SET_MASKED", "none"), + ("OVS_ACTION_ATTR_SET_MASKED", "ovskey"), ("OVS_ACTION_ATTR_CT", "ctact"), ("OVS_ACTION_ATTR_TRUNC", "uint32"), ("OVS_ACTION_ATTR_PUSH_ETH", "none"), @@ -304,8 +391,85 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), ("OVS_ACTION_ATTR_DROP", "uint32"), + ("OVS_ACTION_ATTR_PSAMPLE", "psample"), ) + class psample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_PSAMPLE_ATTR_UNSPEC", "none"), + ("OVS_PSAMPLE_ATTR_GROUP", "uint32"), + ("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"), + ) + + def dpstr(self, more=False): + args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP") + + cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE") + if cookie: + args += ",cookie(%s)" % \ + "".join(format(x, "02x") for x in cookie) + + return "psample(%s)" % args + + def parse(self, actstr): + desc = ( + ("group", "OVS_PSAMPLE_ATTR_GROUP", int), + ("cookie", "OVS_PSAMPLE_ATTR_COOKIE", + lambda x: list(bytearray.fromhex(x))) + ) + + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + + class sample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_SAMPLE_ATTR_UNSPEC", "none"), + ("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"), + ("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"), + ) + + def dpstr(self, more=False): + args = [] + + args.append("sample={:.2f}%".format( + 100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") / + UINT32_MAX)) + + actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS") + if actions: + args.append("actions(%s)" % actions.dpstr(more)) + + return "sample(%s)" % ",".join(args) + + def parse(self, actstr): + def parse_nested_actions(actstr): + subacts = ovsactions() + parsed_len = subacts.parse(actstr) + return subacts, actstr[parsed_len :] + + def percent_to_rate(percent): + percent = float(percent.strip('%')) + return int(math.floor(UINT32_MAX * (percent / 100.0) + .5)) + + desc = ( + ("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate), + ("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions), + ) + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + class ctact(nla): nla_flags = NLA_F_NESTED @@ -427,50 +591,77 @@ class ovsactions(nla): print_str += "userdata=" for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"): print_str += "%x." % f - if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None: + if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None: print_str += "egress_tun_port=%d" % self.get_attr( - "OVS_USERSPACE_ATTR_TUN_PORT" + "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT" ) print_str += ")" return print_str + def parse(self, actstr): + attrs_desc = ( + ("pid", "OVS_USERSPACE_ATTR_PID", int), + ("userdata", "OVS_USERSPACE_ATTR_USERDATA", + lambda x: list(bytearray.fromhex(x))), + ("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int) + ) + + attrs, actstr = parse_attrs(actstr, attrs_desc) + for attr in attrs: + self["attrs"].append(attr) + + return actstr + def dpstr(self, more=False): print_str = "" - for field in self.nla_map: + for field in self["attrs"]: if field[1] == "none" or self.get_attr(field[0]) is None: continue if print_str != "": print_str += "," - if field[1] == "uint32": - if field[0] == "OVS_ACTION_ATTR_OUTPUT": - print_str += "%d" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_RECIRC": - print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_TRUNC": - print_str += "trunc(%d)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_DROP": - print_str += "drop(%d)" % int(self.get_attr(field[0])) - elif field[1] == "flag": - if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": - print_str += "ct_clear" - elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": - print_str += "pop_vlan" - elif field[0] == "OVS_ACTION_ATTR_POP_ETH": - print_str += "pop_eth" - elif field[0] == "OVS_ACTION_ATTR_POP_NSH": - print_str += "pop_nsh" - elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": - print_str += "pop_mpls" + if field[0] == "OVS_ACTION_ATTR_OUTPUT": + print_str += "%d" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_RECIRC": + print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_TRUNC": + print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_CT_CLEAR": + print_str += "ct_clear" + elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": + print_str += "pop_vlan" + elif field[0] == "OVS_ACTION_ATTR_POP_ETH": + print_str += "pop_eth" + elif field[0] == "OVS_ACTION_ATTR_POP_NSH": + print_str += "pop_nsh" + elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": + print_str += "pop_mpls" else: datum = self.get_attr(field[0]) if field[0] == "OVS_ACTION_ATTR_CLONE": print_str += "clone(" print_str += datum.dpstr(more) print_str += ")" + elif field[0] == "OVS_ACTION_ATTR_SET" or \ + field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "set" + field = datum + mask = None + if field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "_masked" + field = datum[0] + mask = datum[1] + print_str += "(" + print_str += field.dpstr(mask, more) + print_str += ")" else: - print_str += datum.dpstr(more) + try: + print_str += datum.dpstr(more) + except: + print_str += "{ATTR: %s not decoded}" % field[0] return print_str @@ -531,7 +722,7 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): actstr = actstr[len(flat_act[0]):] - self["attrs"].append([flat_act[1]]) + self["attrs"].append([flat_act[1], True]) actstr = actstr[strspn(actstr, ", ") :] parsed = True @@ -544,6 +735,25 @@ class ovsactions(nla): self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) actstr = actstr[parsedLen:] parsed = True + elif parse_starts_block(actstr, "set(", False): + parencount += 1 + k = ovskey() + actstr = actstr[len("set("):] + actstr = k.parse(actstr, None) + self["attrs"].append(("OVS_ACTION_ATTR_SET", k)) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True + elif parse_starts_block(actstr, "set_masked(", False): + parencount += 1 + k = ovskey() + m = ovskey() + actstr = actstr[len("set_masked("):] + actstr = k.parse(actstr, m) + self["attrs"].append(("OVS_ACTION_ATTR_SET_MASKED", [k, m])) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True elif parse_starts_block(actstr, "ct(", False): parencount += 1 actstr = actstr[len("ct(") :] @@ -637,6 +847,37 @@ class ovsactions(nla): self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True + elif parse_starts_block(actstr, "sample(", False): + sampleact = self.sample() + actstr = sampleact.parse(actstr[len("sample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact]) + parsed = True + + elif parse_starts_block(actstr, "psample(", False): + psampleact = self.psample() + actstr = psampleact.parse(actstr[len("psample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact]) + parsed = True + + elif parse_starts_block(actstr, "userspace(", False): + uact = self.userspace() + actstr = uact.parse(actstr[len("userspace(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact]) + parsed = True + + elif parse_starts_block(actstr, "trunc(", False): + parencount += 1 + actstr, val = parse_extract_field( + actstr, + "trunc(", + r"([0-9]+)", + int, + False, + None, + ) + self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val]) + parsed = True + actstr = actstr[strspn(actstr, ", ") :] while parencount > 0: parencount -= 1 @@ -675,7 +916,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), ("OVS_KEY_ATTR_ND", "ovs_key_nd"), ("OVS_KEY_ATTR_SKB_MARK", "uint32"), - ("OVS_KEY_ATTR_TUNNEL", "none"), + ("OVS_KEY_ATTR_TUNNEL", "ovs_key_tunnel"), ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), ("OVS_KEY_ATTR_DP_HASH", "uint32"), @@ -907,21 +1148,21 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), - ("label", "label", "%d", int), - ("proto", "proto", "%d", int), - ("tclass", "tclass", "%d", int), - ("hlimit", "hlimit", "%d", int), - ("frag", "frag", "%d", int), + ("label", "label", "%d", lambda x: int(x) if x else 0), + ("proto", "proto", "%d", lambda x: int(x) if x else 0), + ("tclass", "tclass", "%d", lambda x: int(x) if x else 0), + ("hlimit", "hlimit", "%d", lambda x: int(x) if x else 0), + ("frag", "frag", "%d", lambda x: int(x) if x else 0), ) def __init__( @@ -1119,7 +1360,7 @@ class ovskey(nla): "target", "target", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")), ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")), @@ -1204,13 +1445,13 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big", convertmac), + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), @@ -1235,6 +1476,163 @@ class ovskey(nla): init=init, ) + class ovs_key_tunnel(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_TUNNEL_KEY_ATTR_ID", "be64"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_TOS", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_TTL", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", "flag"), + ("OVS_TUNNEL_KEY_ATTR_CSUM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_OAM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS", "array(uint32)"), + ("OVS_TUNNEL_KEY_ATTR_TP_SRC", "be16"), + ("OVS_TUNNEL_KEY_ATTR_TP_DST", "be16"), + ("OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_PAD", "none"), + ("OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE", "flag"), + ) + + def parse(self, flowstr, mask=None): + if not flowstr.startswith("tunnel("): + return None, None + + k = ovskey.ovs_key_tunnel() + if mask is not None: + mask = ovskey.ovs_key_tunnel() + + flowstr = flowstr[len("tunnel("):] + + v6_address = None + + fields = [ + ("tun_id=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_ID", + 0xffffffffffffffff, None, None), + + ("src=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "255.255.255.255", "0.0.0.0", + False), + ("dst=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_DST", "255.255.255.255", "0.0.0.0", + False), + + ("ipv6_src=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_SRC", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + ("ipv6_dst=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_DST", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + + ("tos=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TOS", 255, 0, + None), + ("ttl=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TTL", 255, 0, + None), + + ("tp_src=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_SRC", + 65535, 0, None), + ("tp_dst=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_DST", + 65535, 0, None), + ] + + forced_include = ["OVS_TUNNEL_KEY_ATTR_TTL"] + + for prefix, regex, typ, attr_name, mask_val, default_val, v46_flag in fields: + flowstr, value = parse_extract_field(flowstr, prefix, regex, typ, False) + if not attr_name: + raise Exception("Bad list value in tunnel fields") + + if value is None and attr_name in forced_include: + value = default_val + mask_val = default_val + + if value is not None: + if v46_flag is not None: + if v6_address is None: + v6_address = v46_flag + if v46_flag != v6_address: + raise ValueError("Cannot mix v6 and v4 addresses") + k["attrs"].append([attr_name, value]) + if mask is not None: + mask["attrs"].append([attr_name, mask_val]) + else: + if v46_flag is not None: + if v6_address is None or v46_flag != v6_address: + continue + if mask is not None: + mask["attrs"].append([attr_name, default_val]) + + if k["attrs"][0][0] != "OVS_TUNNEL_KEY_ATTR_ID": + raise ValueError("Needs a tunid set") + + if flowstr.startswith("flags("): + flowstr = flowstr[len("flags("):] + flagspos = flowstr.find(")") + flags = flowstr[:flagspos] + flowstr = flowstr[flagspos + 1:] + + flag_attrs = { + "df": "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", + "csum": "OVS_TUNNEL_KEY_ATTR_CSUM", + "oam": "OVS_TUNNEL_KEY_ATTR_OAM" + } + + for flag in flags.split("|"): + if flag in flag_attrs: + k["attrs"].append([flag_attrs[flag], True]) + if mask is not None: + mask["attrs"].append([flag_attrs[flag], True]) + + flowstr = flowstr[strspn(flowstr, ", ") :] + return flowstr, k, mask + + def dpstr(self, mask=None, more=False): + print_str = "tunnel(" + + flagsattrs = [] + for k in self["attrs"]: + noprint = False + if k[0] == "OVS_TUNNEL_KEY_ATTR_ID": + print_str += "tun_id=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_SRC": + print_str += "src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_DST": + print_str += "dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_SRC": + print_str += "ipv6_src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_DST": + print_str += "ipv6_dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TOS": + print_str += "tos=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TTL": + print_str += "ttl=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_SRC": + print_str += "tp_src=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_DST": + print_str += "tp_dst=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT": + noprint = True + flagsattrs.append("df") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_CSUM": + noprint = True + flagsattrs.append("csum") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_OAM": + noprint = True + flagsattrs.append("oam") + + if not noprint: + print_str += "," + + if len(flagsattrs): + print_str += "flags(" + "|".join(flagsattrs) + ")" + print_str += ")" + return print_str + class ovs_key_mpls(nla): fields = (("lse", ">I"),) @@ -1243,6 +1641,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse), ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse), + ("OVS_KEY_ATTR_TUNNEL", "tunnel", ovskey.ovs_key_tunnel), ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse), ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state), ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse), @@ -1309,7 +1708,7 @@ class ovskey(nla): mask["attrs"].append([field[0], m]) self["attrs"].append([field[0], k]) - flowstr = flowstr[strspn(flowstr, "),") :] + flowstr = flowstr[strspn(flowstr, "), ") :] return flowstr @@ -1346,6 +1745,13 @@ class ovskey(nla): True, ), ( + "OVS_KEY_ATTR_TUNNEL", + "tunnel", + None, + False, + False, + ), + ( "OVS_KEY_ATTR_CT_STATE", "ct_state", "0x%04x", @@ -1617,7 +2023,7 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_PORT_NO", "uint32"), ("OVS_VPORT_ATTR_TYPE", "uint32"), ("OVS_VPORT_ATTR_NAME", "asciiz"), - ("OVS_VPORT_ATTR_OPTIONS", "none"), + ("OVS_VPORT_ATTR_OPTIONS", "vportopts"), ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"), ("OVS_VPORT_ATTR_STATS", "vportstats"), ("OVS_VPORT_ATTR_PAD", "none"), @@ -1625,6 +2031,13 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_NETNSID", "uint32"), ) + class vportopts(nla): + nla_map = ( + ("OVS_TUNNEL_ATTR_UNSPEC", "none"), + ("OVS_TUNNEL_ATTR_DST_PORT", "uint16"), + ("OVS_TUNNEL_ATTR_EXTENSION", "none"), + ) + class vportstats(nla): fields = ( ("rx_packets", "=Q"), @@ -1693,7 +2106,7 @@ class OvsVport(GenericNetlinkSocket): raise ne return reply - def attach(self, dpindex, vport_ifname, ptype): + def attach(self, dpindex, vport_ifname, ptype, dport, lwt): msg = OvsVport.ovs_vport_msg() msg["cmd"] = OVS_VPORT_CMD_NEW @@ -1702,12 +2115,43 @@ class OvsVport(GenericNetlinkSocket): msg["dpifindex"] = dpindex port_type = OvsVport.str_to_type(ptype) - msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) msg["attrs"].append( ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]] ) + TUNNEL_DEFAULTS = [("geneve", 6081), + ("vxlan", 4789)] + + for tnl in TUNNEL_DEFAULTS: + if ptype == tnl[0]: + if not dport: + dport = tnl[1] + + if not lwt: + vportopt = OvsVport.ovs_vport_msg.vportopts() + vportopt["attrs"].append( + ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ) + msg["attrs"].append( + ["OVS_VPORT_ATTR_OPTIONS", vportopt] + ) + else: + port_type = OvsVport.OVS_VPORT_TYPE_NETDEV + ipr = pyroute2.iproute.IPRoute() + + if tnl[0] == "geneve": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + geneve_port=dport, + geneve_collect_metadata=True, + geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "vxlan": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + vxlan_learning=0, vxlan_collect_metadata=1, + vxlan_udp_zero_csum6_rx=1, vxlan_port=dport) + break + msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) + try: reply = self.nlm_request( msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK @@ -2018,10 +2462,71 @@ class OvsFlow(GenericNetlinkSocket): print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True) def execute(self, packetmsg): - print("userspace execute command") + print("userspace execute command", flush=True) def action(self, packetmsg): - print("userspace action command") + print("userspace action command", flush=True) + + +class psample_sample(genlmsg): + nla_map = ( + ("PSAMPLE_ATTR_IIFINDEX", "none"), + ("PSAMPLE_ATTR_OIFINDEX", "none"), + ("PSAMPLE_ATTR_ORIGSIZE", "none"), + ("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"), + ("PSAMPLE_ATTR_GROUP_SEQ", "none"), + ("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"), + ("PSAMPLE_ATTR_DATA", "array(uint8)"), + ("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"), + ("PSAMPLE_ATTR_TUNNEL", "none"), + ("PSAMPLE_ATTR_PAD", "none"), + ("PSAMPLE_ATTR_OUT_TC", "none"), + ("PSAMPLE_ATTR_OUT_TC_OCC", "none"), + ("PSAMPLE_ATTR_LATENCY", "none"), + ("PSAMPLE_ATTR_TIMESTAMP", "none"), + ("PSAMPLE_ATTR_PROTO", "none"), + ("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"), + ) + + def dpstr(self): + fields = [] + data = "" + for (attr, value) in self["attrs"]: + if attr == "PSAMPLE_ATTR_SAMPLE_GROUP": + fields.append("group:%d" % value) + if attr == "PSAMPLE_ATTR_SAMPLE_RATE": + fields.append("rate:%d" % value) + if attr == "PSAMPLE_ATTR_USER_COOKIE": + value = "".join(format(x, "02x") for x in value) + fields.append("cookie:%s" % value) + if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0: + data = "data:%s" % "".join(format(x, "02x") for x in value) + + return ("%s %s" % (",".join(fields), data)).strip() + + +class psample_msg(Marshal): + PSAMPLE_CMD_SAMPLE = 0 + PSAMPLE_CMD_GET_GROUP = 1 + PSAMPLE_CMD_NEW_GROUP = 2 + PSAMPLE_CMD_DEL_GROUP = 3 + PSAMPLE_CMD_SET_FILTER = 4 + msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample} + + +class PsampleEvent(EventSocket): + genl_family = "psample" + mcast_groups = ["packets"] + marshal_class = psample_msg + + def read_samples(self): + print("listening for psample events", flush=True) + while True: + try: + for msg in self.get(): + print(msg.dpstr(), flush=True) + except NetlinkError as ne: + raise ne def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): @@ -2053,12 +2558,19 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): for iface in ndb.interfaces: rep = vpl.info(iface.ifname, ifindex) if rep is not None: + opts = "" + vpo = rep.get_attr("OVS_VPORT_ATTR_OPTIONS") + if vpo: + dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") + if dpo: + opts += " tnl-dport:%s" % socket.ntohs(dpo) print( - " port %d: %s (%s)" + " port %d: %s (%s%s)" % ( rep.get_attr("OVS_VPORT_ATTR_PORT_NO"), rep.get_attr("OVS_VPORT_ATTR_NAME"), OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")), + opts, ) ) @@ -2081,7 +2593,7 @@ def main(argv): help="Increment 'verbose' output counter.", default=0, ) - subparsers = parser.add_subparsers() + subparsers = parser.add_subparsers(dest="subcommand") showdpcmd = subparsers.add_parser("show") showdpcmd.add_argument( @@ -2120,12 +2632,30 @@ def main(argv): "--ptype", type=str, default="netdev", - choices=["netdev", "internal"], + choices=["netdev", "internal", "geneve", "vxlan"], help="Interface type (default netdev)", ) + addifcmd.add_argument( + "-p", + "--dport", + type=int, + default=0, + help="Destination port (0 for default)" + ) + addifcmd.add_argument( + "-l", + "--lwt", + type=bool, + default=True, + help="Use LWT infrastructure instead of vport (default true)." + ) delifcmd = subparsers.add_parser("del-if") delifcmd.add_argument("dpname", help="Datapath Name") delifcmd.add_argument("delif", help="Interface name for adding") + delifcmd.add_argument("-d", + "--dellink", + type=bool, default=False, + help="Delete the link as well.") dumpflcmd = subparsers.add_parser("dump-flows") dumpflcmd.add_argument("dumpdp", help="Datapath Name") @@ -2138,6 +2668,8 @@ def main(argv): delfscmd = subparsers.add_parser("del-flows") delfscmd.add_argument("flsbr", help="Datapath name") + subparsers.add_parser("psample-events") + args = parser.parse_args() if args.verbose > 0: @@ -2152,6 +2684,9 @@ def main(argv): sys.setrecursionlimit(100000) + if args.subcommand == "psample-events": + PsampleEvent().read_samples() + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces: @@ -2186,7 +2721,8 @@ def main(argv): print("DP '%s' not found." % args.dpname) return 1 dpindex = rep["dpifindex"] - rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype) + rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype, + args.dport, args.lwt) msg = "vport '%s'" % args.addif if rep and rep["header"]["error"] is None: msg += " added." @@ -2207,6 +2743,9 @@ def main(argv): msg += " removed." else: msg += " failed to remove." + if args.dellink: + ipr = pyroute2.iproute.IPRoute() + ipr.link("del", index=ipr.link_lookup(ifname=args.delif)[0]) elif hasattr(args, "dumpdp"): rep = ovsdp.info(args.dumpdp, 0) if rep is None: diff --git a/tools/testing/selftests/net/openvswitch/settings b/tools/testing/selftests/net/openvswitch/settings new file mode 100644 index 000000000000..e2206265f67c --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/settings @@ -0,0 +1 @@ +timeout=900 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index cfc84958025a..5175c0c83a23 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -842,25 +842,97 @@ setup_bridge() { run_cmd ${ns_a} ip link set veth_A-C master br0 } +setup_ovs_via_internal_utility() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + dport="${4}" + + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t ${type} || return 1 + + ports=$(python3 ./openvswitch/ovs-dpctl.py show) + br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | cut -d: -f1 | xargs) + type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut -d: -f1 | xargs) + veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut -d: -f1 | xargs) + + v4_a_tun="${prefix4}.${a_r1}.1" + v4_b_tun="${prefix4}.${b_r1}.1" + + v6_a_tun="${prefix6}:${a_r1}::1" + v6_b_tun="${prefix6}:${b_r1}::1" + + if [ "${v4_a_tun}" = "${a_addr}" ]; then + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + else + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + fi +} + +setup_ovs_via_vswitchd() { + type="${1}" + b_addr="${2}" + + run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ + set interface ${type}_a type=${type} \ + options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 +} + setup_ovs_vxlan_or_geneve() { type="${1}" a_addr="${2}" b_addr="${3}" + dport="6081" if [ "${type}" = "vxlan" ]; then + dport="4789" opts="${opts} ttl 64 dstport 4789" opts_b="local ${b_addr}" fi - run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ - set interface ${type}_a type=${type} \ - options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 + setup_ovs_via_internal_utility "${type}" "${a_addr}" "${b_addr}" \ + "${dport}" || \ + setup_ovs_via_vswitchd "${type}" "${b_addr}" || return 1 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + run_cmd ip link set ${type}_a up run_cmd ${ns_b} ip link set ${type}_b up } @@ -880,8 +952,24 @@ setup_ovs_vxlan6() { setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } +setup_ovs_br_internal() { + run_cmd python3 ./openvswitch/ovs-dpctl.py add-dp ovs_br0 || \ + return 1 +} + +setup_ovs_br_vswitchd() { + run_cmd ovs-vsctl add-br ovs_br0 || return 1 +} + +setup_ovs_add_if() { + ifname="${1}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 \ + "${ifname}" || \ + run_cmd ovs-vsctl add-port ovs_br0 "${ifname}" +} + setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip + setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -891,7 +979,7 @@ setup_ovs_bridge() { run_cmd ${ns_c} ip link set veth_C-A up run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A - run_cmd ovs-vsctl add-port ovs_br0 veth_A-C + setup_ovs_add_if veth_A-C # Move veth_A-R1 to init run_cmd ${ns_a} ip link set veth_A-R1 netns 1 @@ -922,6 +1010,18 @@ trace() { sleep 1 } +cleanup_del_ovs_internal() { + # squelch the output of the del-if commands since it can be wordy + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true vxlan_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true geneve_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-dp ovs_br0 >/dev/null 2>&1 +} + +cleanup_del_ovs_vswitchd() { + ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null + ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null +} + cleanup() { for pid in ${tcpdump_pids}; do kill ${pid} @@ -940,10 +1040,10 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null - ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null + cleanup_del_ovs_internal + cleanup_del_ovs_vswitchd rm -f "$tmpoutfile" } @@ -1397,6 +1497,12 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { outer_family=${3} ll_mtu=4000 + if [ "${type}" = "vxlan" ]; then + tun_a="vxlan_sys_4789" + elif [ "${type}" = "geneve" ]; then + tun_a="genev_sys_6081" + fi + if [ ${outer_family} -eq 4 ]; then setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header @@ -1407,17 +1513,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi - if [ "${type}" = "vxlan" ]; then - tun_a="vxlan_sys_4789" - elif [ "${type}" = "geneve" ]; then - tun_a="genev_sys_6081" - fi - - trace "" "${tun_a}" "${ns_b}" ${type}_b \ - "" veth_A-R1 "${ns_r1}" veth_R1-A \ - "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ - "" ovs_br0 "" veth-A-C \ - "${ns_c}" veth_C-A + trace "" ${type}_a "${ns_b}" ${type}_b \ + "" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "" ovs_br0 "" veth-A_C \ + "${ns_c}" veth_C-A "" "${tun_a}" if [ ${family} -eq 4 ]; then ping=ping @@ -1436,8 +1536,9 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { mtu "${ns_b}" veth_B-R1 ${ll_mtu} mtu "${ns_r1}" veth_R1-B ${ll_mtu} - mtu "" ${tun_a} $((${ll_mtu} + 1000)) - mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + mtu "" ${tun_a} $((${ll_mtu} + 1000)) 2>/dev/null || \ + mtu "" ${type}_a $((${ll_mtu} + 1000)) 2>/dev/null + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1 diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 000000000000..e23210aa547f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 000000000000..9e69a2ed5bc3 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 522d991e310e..bd88b90b902b 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -26,7 +26,7 @@ LIB := $(LIBDIR)/libaotst.a LDLIBS += $(LIB) -pthread LIBDEPS := lib/aolib.h Makefile -CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += -Wall -O2 -g -fno-strict-aliasing CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index e154d9e198a9..a5698b0a3718 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -30,8 +30,6 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - uint64_t before_challenge_ack, after_challenge_ack; - uint64_t before_syn_challenge, after_syn_challenge; struct tcp_ao_counters before_ao, after_ao; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; @@ -62,8 +60,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); - before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); if (test_get_tcp_ao_counters(sk, &before_ao)) test_error("test_get_tcp_ao_counters()"); @@ -82,8 +78,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); - after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); if (test_get_tcp_ao_counters(sk, &after_ao)) test_error("test_get_tcp_ao_counters()"); if (!check_restore) { @@ -98,18 +92,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, close(sk); return; } - if (after_challenge_ack <= before_challenge_ack || - after_syn_challenge <= before_syn_challenge) { - /* - * It's also meant to test simultaneous open, so check - * these counters as well. - */ - test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", - tst, after_challenge_ack, before_challenge_ack, - after_syn_challenge, before_syn_challenge); - close(sk); - return; - } if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { close(sk); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 85b3baa3f7f3..3e74cfa1a2bf 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -53,6 +53,7 @@ static bool cfg_do_ipv6; static bool cfg_do_connected; static bool cfg_do_connectionless; static bool cfg_do_msgmore; +static bool cfg_do_recv = true; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; @@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr, if (!sent) return; + if (!cfg_do_recv) + return; + if (test->gso_len) mss = test->gso_len; else @@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (fdr == -1) error(1, errno, "socket r"); - if (bind(fdr, addr, alen)) - error(1, errno, "bind"); + if (cfg_do_recv) { + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + } /* Have tests fail quickly instead of hang */ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) @@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + while ((c = getopt(argc, argv, "46cCmRst:")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_do_msgmore = true; break; + case 'R': + cfg_do_recv = false; + break; case 's': cfg_do_setsockopt = true; break; diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index 6c63178086b0..85d1fa3c1ff7 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -27,6 +27,31 @@ test_route_mtu() { ip route add local fd00::1/128 table local dev lo mtu 1500 } +setup_dummy_sink() { + ip link add name sink mtu 1500 type dummy + ip addr add dev sink 10.0.0.0/24 + ip addr add dev sink fd00::2/64 nodad + ip link set dev sink up +} + +test_hw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation on >/dev/null +} + +test_sw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + +test_sw_gso_sw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic off >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + if [ "$#" -gt 0 ]; then "$1" shift 2 # pop "test_*" arg and "--" delimiter @@ -56,3 +81,21 @@ echo "ipv4 msg_more" echo "ipv6 msg_more" ./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m + +echo "ipv4 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 2da32f4c479b..152171fb1fc8 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -59,6 +59,7 @@ # while it is forwarded between different vrfs. source lib.sh +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -533,6 +534,86 @@ ipv6_ping_frag_asym() ipv6_ping_frag asym } +ipv4_ping_local() +{ + log_section "IPv4 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity || return + + run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP} + log_test $? 0 "VRF ICMP local IPv4" +} + +ipv4_tcp_local() +{ + log_section "IPv4 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF tcp local connection IPv4" +} + +ipv4_udp_local() +{ + log_section "IPv4 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF udp local connection IPv4" +} + +ipv6_ping_local() +{ + log_section "IPv6 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity6 || return + + run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6} + log_test $? 0 "VRF ICMP local IPv6" +} + +ipv6_tcp_local() +{ + log_section "IPv6 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF tcp local connection IPv6" +} + +ipv6_udp_local() +{ + log_section "IPv6 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF udp local connection IPv6" +} + ################################################################################ # usage @@ -555,8 +636,10 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) -TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local +ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local +ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 @@ -594,12 +677,18 @@ do ipv4_traceroute|traceroute) ipv4_traceroute;;& ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& ipv4_ping_frag|ping) ipv4_ping_frag;;& + ipv4_ping_local|ping) ipv4_ping_local;;& + ipv4_tcp_local) ipv4_tcp_local;;& + ipv4_udp_local) ipv4_udp_local;;& ipv6_ping_ttl|ping) ipv6_ping_ttl;;& ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& ipv6_traceroute|traceroute) ipv6_traceroute;;& ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& ipv6_ping_frag|ping) ipv6_ping_frag;;& + ipv6_ping_local|ping) ipv6_ping_local;;& + ipv6_tcp_local) ipv6_tcp_local;;& + ipv6_udp_local) ipv6_udp_local;;& # setup namespaces and config, but do not run any tests setup_sym|setup) setup_sym; exit 0;; diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk new file mode 100644 index 000000000000..59cb26cf3f73 --- /dev/null +++ b/tools/testing/selftests/net/ynl.mk @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +# YNL selftest build snippet + +# Inputs: +# +# YNL_GENS: families we need in the selftests +# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_FILES: TEST_GEN_FILES which need YNL + +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) + +$(YNL_OUTPUTS): $(OUTPUT)/libynl.a +$(YNL_OUTPUTS): CFLAGS += \ + -I$(top_srcdir)/usr/include/ $(KHDR_INCLUDES) \ + -I$(top_srcdir)/tools/net/ynl/lib/ \ + -I$(top_srcdir)/tools/net/ynl/generated/ + +$(OUTPUT)/libynl.a: + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 40dd95228051..3fbabab46958 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -152,7 +152,7 @@ CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ - $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) + $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA) LDFLAGS := REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 94bb6e11c16f..093d0512f4c5 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -64,6 +64,14 @@ static const char *argv0; /* will be used by constructor tests */ static int constructor_test_value; +static const int is_nolibc = +#ifdef NOLIBC + 1 +#else + 0 +#endif +; + /* definition of a series of tests */ struct test { const char *name; /* test name */ @@ -607,7 +615,7 @@ int expect_strne(const char *expr, int llen, const char *cmp) static __attribute__((unused)) int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const char *cmp) { - llen += printf(" = %lu <%s> ", expr, buf); + llen += printf(" = %lu <%s> ", (unsigned long)expr, buf); if (strcmp(buf, cmp) != 0) { result(llen, FAIL); return 1; @@ -621,6 +629,51 @@ int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const return 0; } +#define EXPECT_STRTOX(cond, func, input, base, expected, chars, expected_errno) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strtox(llen, func, input, base, expected, chars, expected_errno); } while (0) + +static __attribute__((unused)) +int expect_strtox(int llen, void *func, const char *input, int base, intmax_t expected, int expected_chars, int expected_errno) +{ + char *endptr; + int actual_errno, actual_chars; + intmax_t r; + + errno = 0; + if (func == strtol) { + r = strtol(input, &endptr, base); + } else if (func == strtoul) { + r = strtoul(input, &endptr, base); + } else { + result(llen, FAIL); + return 1; + } + actual_errno = errno; + actual_chars = endptr - input; + + llen += printf(" %lld = %lld", (long long)expected, (long long)r); + if (r != expected) { + result(llen, FAIL); + return 1; + } + if (expected_chars == -1) { + if (*endptr != '\0') { + result(llen, FAIL); + return 1; + } + } else if (expected_chars != actual_chars) { + result(llen, FAIL); + return 1; + } + if (actual_errno != expected_errno) { + result(llen, FAIL); + return 1; + } + + result(llen, OK); + return 0; +} + /* declare tests based on line numbers. There must be exactly one test per line. */ #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); @@ -942,6 +995,7 @@ int run_syscall(int min, int max) int ret = 0; void *p1, *p2; int has_gettid = 1; + int has_brk; /* <proc> indicates whether or not /proc is mounted */ proc = stat("/proc", &stat_buf) == 0; @@ -954,6 +1008,9 @@ int run_syscall(int min, int max) has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30); #endif + /* on musl setting brk()/sbrk() always fails */ + has_brk = brk(0) == 0; + for (test = min; test >= 0 && test <= max; test++) { int llen = 0; /* line length */ @@ -969,9 +1026,9 @@ int run_syscall(int min, int max) CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break; - CASE_TEST(sbrk_0); EXPECT_PTRNE(1, sbrk(0), (void *)-1); break; - CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break; - CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break; + CASE_TEST(sbrk_0); EXPECT_PTRNE(has_brk, sbrk(0), (void *)-1); break; + CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(has_brk, (p2 == (void *)-1) || p2 == p1); break; + CASE_TEST(brk); EXPECT_SYSZR(has_brk, brk(sbrk(0))); break; CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break; CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break; CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break; @@ -1076,19 +1133,17 @@ int run_stdlib(int min, int max) CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; -#ifdef NOLIBC - CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 0), buf, 3, "test"); break; - CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 1), buf, 4, "test"); break; - CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 5), buf, 7, "test"); break; - CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 6), buf, 7, "testb"); break; - CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 7), buf, 7, "testba"); break; - CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 8), buf, 7, "testbar"); break; - CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 0), buf, 3, "test"); break; - CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 1), buf, 3, ""); break; - CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 2), buf, 3, "b"); break; - CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; - CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; -#endif + CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 1), buf, 4, "test"); break; + CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 5), buf, 7, "test"); break; + CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 6), buf, 7, "testb"); break; + CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 7), buf, 7, "testba"); break; + CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 8), buf, 7, "testbar"); break; + CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 1), buf, 3, ""); break; + CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break; + CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; + CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; @@ -1139,6 +1194,26 @@ int run_stdlib(int min, int max) CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break; CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break; CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break; + CASE_TEST(strtol_simple); EXPECT_STRTOX(1, strtol, "35", 10, 35, -1, 0); break; + CASE_TEST(strtol_positive); EXPECT_STRTOX(1, strtol, "+35", 10, 35, -1, 0); break; + CASE_TEST(strtol_negative); EXPECT_STRTOX(1, strtol, "-35", 10, -35, -1, 0); break; + CASE_TEST(strtol_hex_auto); EXPECT_STRTOX(1, strtol, "0xFF", 0, 255, -1, 0); break; + CASE_TEST(strtol_base36); EXPECT_STRTOX(1, strtol, "12yZ", 36, 50507, -1, 0); break; + CASE_TEST(strtol_cutoff); EXPECT_STRTOX(1, strtol, "1234567890", 8, 342391, 7, 0); break; + CASE_TEST(strtol_octal_auto); EXPECT_STRTOX(1, strtol, "011", 0, 9, -1, 0); break; + CASE_TEST(strtol_hex_00); EXPECT_STRTOX(1, strtol, "0x00", 16, 0, -1, 0); break; + CASE_TEST(strtol_hex_FF); EXPECT_STRTOX(1, strtol, "FF", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_ff); EXPECT_STRTOX(1, strtol, "ff", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_prefix); EXPECT_STRTOX(1, strtol, "0xFF", 16, 255, -1, 0); break; + CASE_TEST(strtol_trailer); EXPECT_STRTOX(1, strtol, "35foo", 10, 35, 2, 0); break; + CASE_TEST(strtol_overflow); EXPECT_STRTOX(1, strtol, "0x8000000000000000", 16, LONG_MAX, -1, ERANGE); break; + CASE_TEST(strtol_underflow); EXPECT_STRTOX(1, strtol, "-0x8000000000000001", 16, LONG_MIN, -1, ERANGE); break; + CASE_TEST(strtoul_negative); EXPECT_STRTOX(1, strtoul, "-0x1", 16, ULONG_MAX, 4, 0); break; + CASE_TEST(strtoul_overflow); EXPECT_STRTOX(1, strtoul, "0x10000000000000000", 16, ULONG_MAX, -1, ERANGE); break; + CASE_TEST(strerror_success); EXPECT_STREQ(is_nolibc, strerror(0), "errno=0"); break; + CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break; + CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break; + CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break; case __LINE__: return ret; /* must be last */ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index c0a5a7cea9fa..0446e6326a40 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -15,9 +15,10 @@ download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 test_mode=system +CFLAGS_EXTRA="-Werror" archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" -TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@") +TEMP=$(getopt -o 'j:d:c:b:a:m:peh' -n "$0" -- "$@") eval set -- "$TEMP" unset TEMP @@ -40,6 +41,7 @@ Options: -a [ARCH] Host architecture of toolchains to use (default: ${hostarch}) -b [DIR] Build location (default: ${build_location}) -m [MODE] Test mode user/system (default: ${test_mode}) + -e Disable -Werror EOF } @@ -66,6 +68,9 @@ while true; do '-m') test_mode="$2" shift 2; continue ;; + '-e') + CFLAGS_EXTRA="" + shift; continue ;; '-h') print_usage exit 0 @@ -153,7 +158,7 @@ test_arch() { exit 1 esac printf '%-15s' "$arch:" - swallow_output "${MAKE[@]}" "$test_target" V=1 + swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1 cp run.out run.out."${arch}" "${MAKE[@]}" report | grep passed } diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 254d676a2689..185dc76ebb5f 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,8 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + +LOCAL_HDRS += helpers.h + include ../lib.mk -$(TEST_GEN_PROGS): helpers.c helpers.h +$(TEST_GEN_PROGS): helpers.c diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 47746b0c6acd..7c2a4349170a 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,11 +16,56 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> +#include <linux/ioctl.h> #include "pidfd.h" #include "../clone3/clone3_selftests.h" #include "../kselftest_harness.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_CGROUP_NAMESPACE +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#endif + +#ifndef PIDFD_GET_IPC_NAMESPACE +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#endif + +#ifndef PIDFD_GET_NET_NAMESPACE +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#endif + +#ifndef PIDFD_GET_PID_NAMESPACE +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#endif + +#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#endif + +#ifndef PIDFD_GET_TIME_NAMESPACE +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#endif + +#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#endif + +#ifndef PIDFD_GET_USER_NAMESPACE +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#endif + +#ifndef PIDFD_GET_UTS_NAMESPACE +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#endif + enum { PIDFD_NS_USER, PIDFD_NS_MNT, @@ -31,22 +76,25 @@ enum { PIDFD_NS_CGROUP, PIDFD_NS_PIDCLD, PIDFD_NS_TIME, + PIDFD_NS_TIMECLD, PIDFD_NS_MAX }; const struct ns_info { const char *name; int flag; + unsigned int pidfd_ioctl; } ns_info[] = { - [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, }, - [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, }, - [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, }, - [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, }, - [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, }, - [PIDFD_NS_NET] = { "net", CLONE_NEWNET, }, - [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, }, - [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, }, - [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, }, + [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, }, + [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, }, + [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, }, + [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, }, + [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, }, + [PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, }, + [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, }, + [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, }, + [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, }, + [PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, }, }; FIXTURE(current_nsset) @@ -54,6 +102,7 @@ FIXTURE(current_nsset) pid_t pid; int pidfd; int nsfds[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds[PIDFD_NS_MAX]; pid_t child_pid_exited; int child_pidfd_exited; @@ -61,10 +110,12 @@ FIXTURE(current_nsset) pid_t child_pid1; int child_pidfd1; int child_nsfds1[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds1[PIDFD_NS_MAX]; pid_t child_pid2; int child_pidfd2; int child_nsfds2[PIDFD_NS_MAX]; + int child_pidfd_derived_nsfds2[PIDFD_NS_MAX]; }; static int sys_waitid(int which, pid_t pid, int options) @@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset) char c; for (i = 0; i < PIDFD_NS_MAX; i++) { - self->nsfds[i] = -EBADF; - self->child_nsfds1[i] = -EBADF; - self->child_nsfds2[i] = -EBADF; + self->nsfds[i] = -EBADF; + self->child_nsfds1[i] = -EBADF; + self->child_nsfds2[i] = -EBADF; + self->child_pidfd_derived_nsfds[i] = -EBADF; + self->child_pidfd_derived_nsfds1[i] = -EBADF; + self->child_pidfd_derived_nsfds2[i] = -EBADF; } proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC); @@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset) } self->pid = getpid(); + self->pidfd = sys_pidfd_open(self->pid, 0); + EXPECT_GT(self->pidfd, 0) { + TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + } + for (i = 0; i < PIDFD_NS_MAX; i++) { const struct ns_info *info = &ns_info[i]; self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC); @@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset) info->name, self->pid); } } - } - self->pidfd = sys_pidfd_open(self->pid, 0); - EXPECT_GT(self->pidfd, 0) { - TH_LOG("%m - Failed to open pidfd for process %d", self->pid); + self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->pid); + } + } } /* Create task that exits right away. */ - self->child_pid_exited = create_child(&self->child_pidfd_exited, - CLONE_NEWUSER | CLONE_NEWNET); + self->child_pid_exited = create_child(&self->child_pidfd_exited, 0); EXPECT_GE(self->child_pid_exited, 0); - if (self->child_pid_exited == 0) + if (self->child_pid_exited == 0) { + if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0) + _exit(EXIT_FAILURE); + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) + _exit(EXIT_FAILURE); _exit(EXIT_SUCCESS); + } ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0); @@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset) EXPECT_EQ(ret, 0); /* Create tasks that will be stopped. */ - self->child_pid1 = create_child(&self->child_pidfd1, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER); + else + self->child_pid1 = create_child(&self->child_pidfd1, 0); EXPECT_GE(self->child_pid1, 0); if (self->child_pid1 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset) ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); EXPECT_EQ(ret, 0); - self->child_pid2 = create_child(&self->child_pidfd2, - CLONE_NEWUSER | CLONE_NEWNS | - CLONE_NEWCGROUP | CLONE_NEWIPC | - CLONE_NEWUTS | CLONE_NEWPID | - CLONE_NEWNET); + if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_PID] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID); + else if (self->nsfds[PIDFD_NS_USER] >= 0) + self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER); + else + self->child_pid2 = create_child(&self->child_pidfd2, 0); EXPECT_GE(self->child_pid2, 0); if (self->child_pid2 == 0) { close(ipc_sockets[0]); - if (!switch_timens()) + if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) { + TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) { + TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid); _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) { + TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) { + TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) { + TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } + if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) { + TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid); + _exit(EXIT_FAILURE); + } if (write_nointr(ipc_sockets[1], "1", 1) < 0) _exit(EXIT_FAILURE); @@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset) info->name, self->child_pid1); } } + + self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds1[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid1); + } + } + + self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0); + if (self->child_pidfd_derived_nsfds2[i] < 0) { + EXPECT_EQ(errno, EOPNOTSUPP) { + TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d", + info->name, self->child_pid2); + } + } } close(proc_fd); @@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset) close(self->child_nsfds1[i]); if (self->child_nsfds2[i] >= 0) close(self->child_nsfds2[i]); + if (self->child_pidfd_derived_nsfds[i] >= 0) + close(self->child_pidfd_derived_nsfds[i]); + if (self->child_pidfd_derived_nsfds1[i] >= 0) + close(self->child_pidfd_derived_nsfds1[i]); + if (self->child_pidfd_derived_nsfds2[i] >= 0) + close(self->child_pidfd_derived_nsfds2[i]); } if (self->child_pidfd1 >= 0) @@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns) } } +TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns) +{ + int i; + pid_t pid; + + pid = getpid(); + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + int nsfd; + + if (self->child_pidfd_derived_nsfds1[i] < 0) + continue; + + if (info->flag) { + ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) { + TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + } + + /* Verify that we have changed to the correct namespaces. */ + if (info->flag == CLONE_NEWPID) + nsfd = self->child_pidfd_derived_nsfds[i]; + else + nsfd = self->child_pidfd_derived_nsfds1[i]; + ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) { + TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d", + info->name, self->child_pid1, + self->child_pidfd_derived_nsfds1[i]); + } + TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]); + } +} + TEST_F(current_nsset, pidfd_one_shot_setns) { unsigned flags = 0; @@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play) info->name, self->child_pid2, self->child_nsfds2[i]); } + + /* + * Can't setns to a user namespace outside of our hierarchy since we + * don't have caps in there and didn't create it. That means that under + * no circumstances should we be able to setns to any of the other + * ones since they aren't owned by our user namespace. + */ + for (i = 0; i < PIDFD_NS_MAX; i++) { + const struct ns_info *info = &ns_info[i]; + + if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag) + continue; + + ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) { + TH_LOG("Managed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } + TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d", + info->name, self->child_pid2, + self->child_pidfd_derived_nsfds2[i]); + } } TEST(setns_einval) diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk index b909bee3cb2a..abb9e58d95c4 100644 --- a/tools/testing/selftests/powerpc/flags.mk +++ b/tools/testing/selftests/powerpc/flags.mk @@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") export GIT_VERSION endif -ifeq ($(CFLAGS),) -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) export CFLAGS -endif - diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index a156ac5dd2c6..973968f45bba 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -2,6 +2,7 @@ /fd-001-lookup /fd-002-posix-eq /fd-003-kthread +/proc-2-is-kthread /proc-fsconfig-hidepid /proc-loadavg-001 /proc-multiple-procfs @@ -9,6 +10,7 @@ /proc-pid-vm /proc-self-map-files-001 /proc-self-map-files-002 +/proc-self-isnt-kthread /proc-self-syscall /proc-self-wchan /proc-subset-pid diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index cd95369254c0..b12921b9794b 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -1,17 +1,19 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function -CFLAGS += -D_GNU_SOURCE +CFLAGS += $(TOOLS_INCLUDES) LDFLAGS += -pthread TEST_GEN_PROGS := TEST_GEN_PROGS += fd-001-lookup TEST_GEN_PROGS += fd-002-posix-eq TEST_GEN_PROGS += fd-003-kthread +TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-isnt-kthread TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan TEST_GEN_PROGS += proc-subset-pid diff --git a/tools/testing/selftests/proc/proc-2-is-kthread.c b/tools/testing/selftests/proc/proc-2-is-kthread.c new file mode 100644 index 000000000000..f13668fb482e --- /dev/null +++ b/tools/testing/selftests/proc/proc-2-is-kthread.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that kernel thread is reported as such. */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + /* + * The following solutions don't really work: + * + * 1) jit kernel module which creates kernel thread: + * test becomes arch-specific, + * problems with mandatory module signing, + * problems with lockdown mode, + * doesn't work with CONFIG_MODULES=n at all, + * kthread creation API is formally unstable internal kernel API, + * need a mechanism to report test kernel thread's PID back, + * + * 2) ksoftirqd/0 and kswapd0 look like stable enough kernel threads, + * but their PIDs are unstable. + * + * Check against kthreadd which always seem to exist under pid 2. + */ + int fd = open("/proc/2/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + assert(strstr(buf, "Kthread:\t1\n")); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 56198d4ca2bf..b3f898aab4ab 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -381,9 +381,6 @@ static int test_proc_pid_statm(pid_t pid) assert(rv >= 0); assert(rv <= sizeof(buf)); - if (0) { - write(1, buf, rv); - } const char *p = buf; const char *const end = p + rv; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index cacbd2a4aec9..d04685771952 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -45,6 +45,7 @@ #include <linux/kdev_t.h> #include <sys/time.h> #include <sys/resource.h> +#include <linux/fs.h> #include "../kselftest.h" @@ -492,6 +493,91 @@ int main(void) assert(buf[13] == '\n'); } + /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */ + { + char path_buf[256], exp_path_buf[256]; + struct procmap_query q; + int fd, err; + + snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid); + fd = open(path_buf, O_RDONLY); + if (fd == -1) + return 1; + + /* CASE 1: exact MATCH at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = 0; + q.vma_name_addr = (__u64)(unsigned long)path_buf; + q.vma_name_size = sizeof(path_buf); + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR); + assert(q.query_flags == 0); + + assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE)); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + assert(q.vma_page_size == PAGE_SIZE); + + assert(q.vma_offset == 0); + assert(q.inode == st.st_ino); + assert(q.dev_major == MAJOR(st.st_dev)); + assert(q.dev_minor == MINOR(st.st_dev)); + + snprintf(exp_path_buf, sizeof(exp_path_buf), + "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino); + assert(q.vma_name_size == strlen(exp_path_buf) + 1); + assert(strcmp(path_buf, exp_path_buf) == 0); + + /* CASE 2: NO MATCH at VADDR-1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = 0; /* exact match */ + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR - 1; + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + assert(err == 0); + + assert(q.query_addr == VADDR - 1); + assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA); + assert(q.vma_start == VADDR); + assert(q.vma_end == VADDR + PAGE_SIZE); + + /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */ + q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + + /* CASE 5: NO MATCH WRITABLE at VADDR */ + memset(&q, 0, sizeof(q)); + q.size = sizeof(q); + q.query_addr = VADDR; + q.query_flags = PROCMAP_QUERY_VMA_WRITABLE; + + err = ioctl(fd, PROCMAP_QUERY, &q); + err = err < 0 ? -errno : 0; + assert(err == -ENOENT); + } + return 0; } #else diff --git a/tools/testing/selftests/proc/proc-self-isnt-kthread.c b/tools/testing/selftests/proc/proc-self-isnt-kthread.c new file mode 100644 index 000000000000..e01f4e0a91b4 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-isnt-kthread.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that userspace program is not kernel thread. */ +#undef NDEBUG +#include <assert.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +int main(void) +{ + int fd = open("/proc/self/status", O_RDONLY); + assert(fd >= 0); + + char buf[4096]; + ssize_t rv = read(fd, buf, sizeof(buf)); + assert(0 <= rv && rv < sizeof(buf)); + buf[rv] = '\0'; + + /* This test is very much not kernel thread. */ + assert(strstr(buf, "Kthread:\t0\n")); + + return 0; +} diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 021863f86053..f408bd6bfc3d 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 1b339d6bbff1..1ff1104e6575 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -101,12 +101,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy) * * Return: 0 on success, < 0 on error. */ -static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value) +static int print_results_cache(const char *filename, pid_t bm_pid, __u64 llc_value) { FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value); + printf("Pid: %d \t LLC_value: %llu\n", (int)bm_pid, llc_value); } else { fp = fopen(filename, "a"); if (!fp) { @@ -114,7 +114,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value return -1; } - fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value); + fprintf(fp, "Pid: %d \t llc_value: %llu\n", (int)bm_pid, llc_value); fclose(fp); } @@ -133,7 +133,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value * Return: =0 on success. <0 on failure. */ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid) + const char *filename, pid_t bm_pid) { int ret; @@ -161,7 +161,7 @@ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, * * Return: =0 on success. <0 on failure. */ -int measure_llc_resctrl(const char *filename, int bm_pid) +int measure_llc_resctrl(const char *filename, pid_t bm_pid) { unsigned long llc_occu_resc = 0; int ret; diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index c7686fb6641a..742782438ca3 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -158,7 +158,6 @@ static int cat_test(const struct resctrl_test *test, struct resctrl_val_param *param, size_t span, unsigned long current_mask) { - char *resctrl_val = param->resctrl_val; struct perf_event_read pe_read; struct perf_event_attr pea; cpu_set_t old_affinity; @@ -178,8 +177,7 @@ static int cat_test(const struct resctrl_test *test, return ret; /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); if (ret) goto reset_affinity; @@ -272,7 +270,6 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param start_mask = create_bit_mask(start, n); struct resctrl_val_param param = { - .resctrl_val = CAT_STR, .ctrlgrp = "c1", .filename = RESULT_FILE_NAME, .num_of_runs = 0, @@ -291,11 +288,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param return ret; } +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + unsigned int eax, ebx, ecx, edx; + + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +} + static int noncont_cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) { unsigned long full_cache_mask, cont_mask, noncont_mask; - unsigned int eax, ebx, ecx, edx, sparse_masks; + unsigned int sparse_masks; int bit_center, ret; char schemata[64]; @@ -304,15 +320,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test, if (ret) return ret; - if (!strcmp(test->resource, "L3")) - __cpuid_count(0x10, 1, eax, ebx, ecx, edx); - else if (!strcmp(test->resource, "L2")) - __cpuid_count(0x10, 2, eax, ebx, ecx, edx); - else - return -EINVAL; - - if (sparse_masks != ((ecx >> 3) & 1)) { - ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n"); + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); return 1; } diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 0105afec6188..0c045080d808 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -16,6 +16,17 @@ #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 +#define CON_MON_LCC_OCCUP_PATH \ + "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" + +static int cmt_init(const struct resctrl_val_param *param, int domain_id) +{ + sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); + + return 0; +} + static int cmt_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -29,6 +40,13 @@ static int cmt_setup(const struct resctrl_test *test, return 0; } +static int cmt_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + sleep(1); + return measure_llc_resctrl(param->filename, bm_pid); +} + static int show_results_info(unsigned long sum_llc_val, int no_of_bits, unsigned long cache_span, unsigned long max_diff, unsigned long max_diff_percent, unsigned long num_of_runs, @@ -126,13 +144,13 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param } struct resctrl_val_param param = { - .resctrl_val = CMT_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, .num_of_runs = 0, + .init = cmt_init, .setup = cmt_setup, + .measure = cmt_measure, }; span = cache_portion_size(cache_total_size, param.mask, long_mask); diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index a6ad39aae162..ab8496a4925b 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -17,6 +17,19 @@ #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 +static int mba_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + /* * Change schemata percentage from 100 to 10%. Write schemata to specified * con_mon grp, mon_grp in resctrl FS. @@ -51,6 +64,12 @@ static int mba_setup(const struct resctrl_test *test, return 0; } +static int mba_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_mem_bw(uparams, param, bm_pid, "reads"); +} + static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) { int allocation, runs; @@ -145,12 +164,11 @@ static void mba_test_cleanup(void) static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBA_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mba_setup + .init = mba_init, + .setup = mba_setup, + .measure = mba_measure, }; int ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 6fec51e1ff46..6b5a3b52d861 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -86,6 +86,19 @@ static int check_results(size_t span) return ret; } +static int mbm_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + static int mbm_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -105,6 +118,12 @@ static int mbm_setup(const struct resctrl_test *test, return ret; } +static int mbm_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_mem_bw(uparams, param, bm_pid, "reads"); +} + static void mbm_test_cleanup(void) { remove(RESULT_FILE_NAME); @@ -113,12 +132,11 @@ static void mbm_test_cleanup(void) static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBM_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mbm_setup + .init = mbm_init, + .setup = mbm_setup, + .measure = mbm_measure, }; int ret; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 00d51fa7531c..2dda56084588 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -43,13 +43,6 @@ #define DEFAULT_SPAN (250 * MB) -#define PARENT_EXIT() \ - do { \ - kill(ppid, SIGKILL); \ - umount_resctrlfs(); \ - exit(EXIT_FAILURE); \ - } while (0) - /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to @@ -88,24 +81,27 @@ struct resctrl_test { /* * resctrl_val_param: resctrl test parameters - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @filename: Name of file to which the o/p should be written - * @bw_report: Bandwidth report type (reads vs writes) - * @setup: Call back function to setup test environment + * @init: Callback function to initialize test environment + * @setup: Callback function to setup per test run environment + * @measure: Callback that performs the measurement (a single test) */ struct resctrl_val_param { - char *resctrl_val; - char ctrlgrp[64]; - char mongrp[64]; + const char *ctrlgrp; + const char *mongrp; char filename[64]; - char *bw_report; unsigned long mask; int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); int (*setup)(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); }; struct perf_event_read { @@ -115,11 +111,6 @@ struct perf_event_read { } values[2]; }; -#define MBM_STR "mbm" -#define MBA_STR "mba" -#define CMT_STR "cmt" -#define CAT_STR "cat" - /* * Memory location that consumes values compiler must not optimize away. * Volatile ensures writes to this location cannot be optimized away by @@ -127,8 +118,6 @@ struct perf_event_read { */ extern volatile int *value_sink; -extern pid_t bm_pid, ppid; - extern char llc_occup_path[1024]; int get_vendor(void); @@ -137,7 +126,7 @@ int filter_dmesg(void); int get_domain_id(const char *resource, int cpu_no, int *domain_id); int mount_resctrlfs(void); int umount_resctrlfs(void); -int validate_bw_report_request(char *bw_report); +const char *get_bw_report_type(const char *bw_report); bool resctrl_resource_exists(const char *resource); bool resctrl_mon_feature_exists(const char *resource, const char *feature); bool resource_info_file_exists(const char *resource, const char *file); @@ -145,15 +134,21 @@ bool test_resource_feature_check(const struct resctrl_test *test); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity); int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity); -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource); -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val); +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource); +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); unsigned char *alloc_buffer(size_t buf_size, int memflush); void mem_flush(unsigned char *buf, size_t buf_size); void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); int run_fill_buf(size_t buf_size, int memflush, int op, bool once); +int initialize_mem_bw_imc(void); +int measure_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid, + const char *bw_report); +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id); int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, const char * const *benchmark_cmd, @@ -174,8 +169,8 @@ void perf_event_initialize_read_format(struct perf_event_read *pe_read); int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no); int perf_event_reset_enable(int pe_fd); int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid); -int measure_llc_resctrl(const char *filename, int bm_pid); + const char *filename, pid_t bm_pid); +int measure_llc_resctrl(const char *filename, pid_t bm_pid); void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines); /* diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 445f306d4c2f..8c275f6b4dd7 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -19,30 +19,10 @@ #define MAX_TOKENS 5 #define READ 0 #define WRITE 1 -#define CON_MON_MBM_LOCAL_BYTES_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" #define CON_MBM_LOCAL_BYTES_PATH \ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" -#define MON_MBM_LOCAL_BYTES_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define MBM_LOCAL_BYTES_PATH \ - "%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define CON_MON_LCC_OCCUP_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define CON_LCC_OCCUP_PATH \ - "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define MON_LCC_OCCUP_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define LCC_OCCUP_PATH \ - "%s/mon_data/mon_L3_%02d/llc_occupancy" - struct membw_read_format { __u64 value; /* The value of the event */ __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ @@ -276,7 +256,7 @@ static int num_of_imcs(void) return count; } -static int initialize_mem_bw_imc(void) +int initialize_mem_bw_imc(void) { int imc, j; @@ -293,44 +273,93 @@ static int initialize_mem_bw_imc(void) return 0; } +static void perf_close_imc_mem_bw(void) +{ + int mc; + + for (mc = 0; mc < imcs; mc++) { + if (imc_counters_config[mc][READ].fd != -1) + close(imc_counters_config[mc][READ].fd); + if (imc_counters_config[mc][WRITE].fd != -1) + close(imc_counters_config[mc][WRITE].fd); + } +} + /* - * get_mem_bw_imc: Memory band width as reported by iMC counters - * @cpu_no: CPU number that the benchmark PID is binded to - * @bw_report: Bandwidth report type (reads, writes) - * - * Memory B/W utilized by a process on a socket can be calculated using - * iMC counters. Perf events are used to read these counters. + * perf_open_imc_mem_bw - Open perf fds for IMCs + * @cpu_no: CPU number that the benchmark PID is bound to * * Return: = 0 on success. < 0 on failure. */ -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) +static int perf_open_imc_mem_bw(int cpu_no) { - float reads, writes, of_mul_read, of_mul_write; - int imc, j, ret; + int imc, ret; - /* Start all iMC counters to log values (both read and write) */ - reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) { - ret = open_perf_event(imc, cpu_no, j); - if (ret) - return -1; - } - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_reset_enable(imc, j); + imc_counters_config[imc][READ].fd = -1; + imc_counters_config[imc][WRITE].fd = -1; + } + + for (imc = 0; imc < imcs; imc++) { + ret = open_perf_event(imc, cpu_no, READ); + if (ret) + goto close_fds; + ret = open_perf_event(imc, cpu_no, WRITE); + if (ret) + goto close_fds; + } + + return 0; + +close_fds: + perf_close_imc_mem_bw(); + return -1; +} + +/* + * do_mem_bw_test - Perform memory bandwidth test + * + * Runs memory bandwidth test over one second period. Also, handles starting + * and stopping of the IMC perf counters around the test. + */ +static void do_imc_mem_bw_test(void) +{ + int imc; + + for (imc = 0; imc < imcs; imc++) { + membw_ioctl_perf_event_ioc_reset_enable(imc, READ); + membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE); } sleep(1); /* Stop counters after a second to get results (both read and write) */ for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_disable(imc, j); + membw_ioctl_perf_event_ioc_disable(imc, READ); + membw_ioctl_perf_event_ioc_disable(imc, WRITE); } +} + +/* + * get_mem_bw_imc - Memory bandwidth as reported by iMC counters + * @bw_report: Bandwidth report type (reads, writes) + * + * Memory bandwidth utilized by a process on a socket can be calculated + * using iMC counters. Perf events are used to read these counters. + * + * Return: = 0 on success. < 0 on failure. + */ +static int get_mem_bw_imc(const char *bw_report, float *bw_imc) +{ + float reads, writes, of_mul_read, of_mul_write; + int imc; + + /* Start all iMC counters to log values (both read and write) */ + reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; /* * Get results which are stored in struct type imc_counter_config - * Take over flow into consideration before calculating total b/w + * Take overflow into consideration before calculating total bandwidth. */ for (imc = 0; imc < imcs; imc++) { struct imc_counter_config *r = @@ -340,15 +369,13 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get read b/w through iMC"); - + ksft_perror("Couldn't get read bandwidth through iMC"); return -1; } if (read(w->fd, &w->return_value, sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get write bw through iMC"); - + ksft_perror("Couldn't get write bandwidth through iMC"); return -1; } @@ -369,11 +396,6 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) writes += w->return_value.value * of_mul_write * SCALE; } - for (imc = 0; imc < imcs; imc++) { - close(imc_counters_config[imc][READ].fd); - close(imc_counters_config[imc][WRITE].fd); - } - if (strcmp(bw_report, "reads") == 0) { *bw_imc = reads; return 0; @@ -388,84 +410,45 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) return 0; } -void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id) +/* + * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" + * @param: Parameters passed to resctrl_val() + * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) + */ +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id) { - if (ctrlgrp && mongrp) - sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, mongrp, domain_id); - else if (!ctrlgrp && mongrp) - sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - mongrp, domain_id); - else if (ctrlgrp && !mongrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - ctrlgrp, domain_id); - else if (!ctrlgrp && !mongrp) - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - domain_id); + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); } /* - * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * Open file to read MBM local bytes from resctrl FS */ -static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) { - int domain_id; - - if (get_domain_id("MB", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } + FILE *fp; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) - set_mbm_path(ctrlgrp, mongrp, domain_id); + fp = fopen(mbm_bw_file, "r"); + if (!fp) + ksft_perror("Failed to open total memory bandwidth file"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - if (ctrlgrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, domain_id); - else - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, domain_id); - } + return fp; } /* * Get MBM Local bytes as reported by resctrl FS - * For MBM, - * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp - * 2. If only con_mon grp is given, then read from con_mon grp - * 3. If both are not given, then read from root con_mon grp - * For MBA, - * 1. If con_mon grp is given, then read from it - * 2. If con_mon grp is not given, then read from root con_mon grp */ -static int get_mem_bw_resctrl(unsigned long *mbm_total) +static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) { - FILE *fp; - - fp = fopen(mbm_total_path, "r"); - if (!fp) { - ksft_perror("Failed to open total bw file"); - + if (fscanf(fp, "%lu\n", mbm_total) <= 0) { + ksft_perror("Could not get MBM local bytes"); return -1; } - if (fscanf(fp, "%lu", mbm_total) <= 0) { - ksft_perror("Could not get mbm local bytes"); - fclose(fp); - - return -1; - } - fclose(fp); - return 0; } -pid_t bm_pid, ppid; +static pid_t bm_pid, ppid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { @@ -523,6 +506,13 @@ void signal_handler_unregister(void) } } +static void parent_exit(pid_t ppid) +{ + kill(ppid, SIGKILL); + umount_resctrlfs(); + exit(EXIT_FAILURE); +} + /* * print_results_bw: the memory bandwidth results are stored in a file * @filename: file that stores the results @@ -532,14 +522,14 @@ void signal_handler_unregister(void) * * Return: 0 on success, < 0 on error. */ -static int print_results_bw(char *filename, int bm_pid, float bw_imc, +static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, unsigned long bw_resc) { unsigned long diff = fabs(bw_imc - bw_resc); FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc); + printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); } else { fp = fopen(filename, "a"); @@ -549,7 +539,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return -1; } if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", - bm_pid, bw_imc, bw_resc, diff) <= 0) { + (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { ksft_print_msg("Could not log results\n"); fclose(fp); @@ -561,73 +551,67 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } -static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) -{ - if (strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, mongrp, sock_num); - else if (!strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH, - mongrp, sock_num); - else if (strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, sock_num); - else if (!strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num); -} - /* - * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) + * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs + * @uparams: User supplied parameters + * @param: Parameters passed to resctrl_val() + * @bm_pid: PID that runs the benchmark + * @bw_report: Bandwidth report type (reads, writes) + * + * Measure memory bandwidth from resctrl and from another source which is + * perf imc value or could be something else if perf imc event is not + * available. Compare the two values to validate resctrl value. It takes + * 1 sec to measure the data. */ -static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +int measure_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid, + const char *bw_report) { - int domain_id; + unsigned long bw_resc, bw_resc_start, bw_resc_end; + FILE *mem_bw_fp; + float bw_imc; + int ret; - if (get_domain_id("L3", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } + bw_report = get_bw_report_type(bw_report); + if (!bw_report) + return -1; - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - set_cmt_path(ctrlgrp, mongrp, domain_id); -} + mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); + if (!mem_bw_fp) + return -1; -static int measure_vals(const struct user_params *uparams, - struct resctrl_val_param *param, - unsigned long *bw_resc_start) -{ - unsigned long bw_resc, bw_resc_end; - float bw_imc; - int ret; + ret = perf_open_imc_mem_bw(uparams->cpu); + if (ret < 0) + goto close_fp; - /* - * Measure memory bandwidth from resctrl and from - * another source which is perf imc value or could - * be something else if perf imc event is not available. - * Compare the two values to validate resctrl value. - * It takes 1sec to measure the data. - */ - ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc); + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); if (ret < 0) - return ret; + goto close_imc; + + rewind(mem_bw_fp); + + do_imc_mem_bw_test(); - ret = get_mem_bw_resctrl(&bw_resc_end); + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); if (ret < 0) - return ret; + goto close_imc; - bw_resc = (bw_resc_end - *bw_resc_start) / MB; - ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); - if (ret) - return ret; + ret = get_mem_bw_imc(bw_report, &bw_imc); + if (ret < 0) + goto close_imc; - *bw_resc_start = bw_resc_end; + perf_close_imc_mem_bw(); + fclose(mem_bw_fp); - return 0; + bw_resc = (bw_resc_end - bw_resc_start) / MB; + + return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); + +close_imc: + perf_close_imc_mem_bw(); +close_fp: + fclose(mem_bw_fp); + return ret; } /* @@ -654,7 +638,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) fp = freopen("/dev/null", "w", stdout); if (!fp) { ksft_perror("Unable to direct benchmark status to /dev/null"); - PARENT_EXIT(); + parent_exit(ppid); } if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { @@ -668,7 +652,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) once = false; } else { ksft_print_msg("Invalid once parameter\n"); - PARENT_EXIT(); + parent_exit(ppid); } if (run_fill_buf(span, memflush, operation, once)) @@ -682,7 +666,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) fclose(stdout); ksft_print_msg("Unable to run specified benchmark\n"); - PARENT_EXIT(); + parent_exit(ppid); } /* @@ -700,21 +684,19 @@ int resctrl_val(const struct resctrl_test *test, const char * const *benchmark_cmd, struct resctrl_val_param *param) { - char *resctrl_val = param->resctrl_val; - unsigned long bw_resc_start = 0; struct sigaction sigact; int ret = 0, pipefd[2]; char pipe_message = 0; union sigval value; + int domain_id; if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - ret = validate_bw_report_request(param->bw_report); - if (ret) - return ret; + ret = get_domain_id(test->resource, uparams->cpu, &domain_id); + if (ret < 0) { + ksft_print_msg("Could not get domain ID\n"); + return ret; } /* @@ -755,7 +737,7 @@ int resctrl_val(const struct resctrl_test *test, /* Register for "SIGUSR1" signal from parent */ if (sigaction(SIGUSR1, &sigact, NULL)) { ksft_perror("Can't register child for signal"); - PARENT_EXIT(); + parent_exit(ppid); } /* Tell parent that child is ready */ @@ -773,10 +755,10 @@ int resctrl_val(const struct resctrl_test *test, sigsuspend(&sigact.sa_mask); ksft_perror("Child is done"); - PARENT_EXIT(); + parent_exit(ppid); } - ksft_print_msg("Benchmark PID: %d\n", bm_pid); + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); /* * The cast removes constness but nothing mutates benchmark_cmd within @@ -792,22 +774,15 @@ int resctrl_val(const struct resctrl_test *test, goto out; /* Write benchmark to specified control&monitoring grp in resctrl FS */ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); if (ret) goto out; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = initialize_mem_bw_imc(); + if (param->init) { + ret = param->init(param, domain_id); if (ret) goto out; - - initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); + } /* Parent waits for child to be ready. */ close(pipefd[1]); @@ -841,17 +816,9 @@ int resctrl_val(const struct resctrl_test *test, if (ret < 0) break; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = measure_vals(uparams, param, &bw_resc_start); - if (ret) - break; - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - sleep(1); - ret = measure_llc_resctrl(param->filename, bm_pid); - if (ret) - break; - } + ret = param->measure(uparams, param, bm_pid); + if (ret) + break; } out: diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 1cade75176eb..250c320349a7 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -456,6 +456,9 @@ int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity) * @grp: Full path and name of the group * @parent_grp: Full path and name of the parent group * + * Creates a group @grp_name if it does not exist yet. If @grp_name is NULL, + * it is interpreted as the root group which always results in success. + * * Return: 0 on success, < 0 on error. */ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) @@ -464,12 +467,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) struct dirent *ep; DIR *dp; - /* - * At this point, we are guaranteed to have resctrl FS mounted and if - * length of grp_name == 0, it means, user wants to use root con_mon - * grp, so do nothing - */ - if (strlen(grp_name) == 0) + if (!grp_name) return 0; /* Check if requested grp exists or not */ @@ -508,7 +506,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) return -1; } - if (fprintf(fp, "%d\n", pid) < 0) { + if (fprintf(fp, "%d\n", (int)pid) < 0) { ksft_print_msg("Failed to write pid to tasks file\n"); fclose(fp); @@ -524,7 +522,6 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * @bm_pid: PID that should be written * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * * If a con_mon grp is requested, create it and write pid to it, otherwise * write pid to root con_mon grp. @@ -534,14 +531,13 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * * Return: 0 on success, < 0 on error. */ -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val) +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp) { char controlgroup[128], monitorgroup[512], monitorgroup_p[256]; char tasks[1024]; int ret = 0; - if (strlen(ctrlgrp)) + if (ctrlgrp) sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s", RESCTRL_PATH); @@ -555,22 +551,19 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" and "cmt" test */ - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - if (strlen(mongrp)) { - sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); - sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); - ret = create_grp(mongrp, monitorgroup, monitorgroup_p); - if (ret) - goto out; - - sprintf(tasks, "%s/mon_groups/%s/tasks", - controlgroup, mongrp); - ret = write_pid_to_tasks(tasks, bm_pid); - if (ret) - goto out; - } + /* Create monitor group and write pid into if it is used */ + if (mongrp) { + sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); + sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); + ret = create_grp(mongrp, monitorgroup, monitorgroup_p); + if (ret) + goto out; + + sprintf(tasks, "%s/mon_groups/%s/tasks", + controlgroup, mongrp); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; } out: @@ -593,7 +586,8 @@ out: * * Return: 0 on success, < 0 on error. */ -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource) +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource) { char controlgroup[1024], reason[128], schema[1024] = {}; int domain_id, fd, schema_len, ret = 0; @@ -611,7 +605,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resour goto out; } - if (strlen(ctrlgrp) != 0) + if (ctrlgrp) sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); @@ -837,22 +831,21 @@ int filter_dmesg(void) return 0; } -int validate_bw_report_request(char *bw_report) +const char *get_bw_report_type(const char *bw_report) { if (strcmp(bw_report, "reads") == 0) - return 0; + return bw_report; if (strcmp(bw_report, "writes") == 0) - return 0; + return bw_report; if (strcmp(bw_report, "nt-writes") == 0) { - strcpy(bw_report, "writes"); - return 0; + return "writes"; } if (strcmp(bw_report, "total") == 0) - return 0; + return bw_report; - fprintf(stderr, "Requested iMC B/W report type unavailable\n"); + fprintf(stderr, "Requested iMC bandwidth report type unavailable\n"); - return -1; + return NULL; } int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile index 627c5fa6d1ab..23605782639e 100644 --- a/tools/testing/selftests/ring-buffer/Makefile +++ b/tools/testing/selftests/ring-buffer/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -Wl,-no-as-needed -Wall CFLAGS += $(KHDR_INCLUDES) -CFLAGS += -D_GNU_SOURCE TEST_GEN_PROGS = map_test diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index c333263f2b27..4664ed79e20b 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -3,7 +3,7 @@ # Originally tools/testing/arm64/abi/Makefile # Additional include paths needed by kselftest.h and local headers -CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. TEST_GEN_FILES := mmap_default mmap_bottomup diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c index 62397d5934f1..ed351a1cb917 100644 --- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) asm(".option push \n\ .option arch, +v \n\ - vsetivli x0, 1, e32, ta, ma \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ vmv.s.x v0, %1 \n\ # Generate SIGSEGV \n\ lw a0, 0(x0) \n\ diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 27668fb3b6d0..895177f6bf4c 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -88,16 +88,16 @@ int main(void) return -2; } - if (!(pair.value & RISCV_HWPROBE_IMA_V)) { + if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) { rc = prctl(PR_RISCV_V_GET_CONTROL); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -3; } rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); if (rc != -1 || errno != EINVAL) { - ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n"); return -4; } diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 62fba7356af2..52d97fae4dbd 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -42,11 +42,11 @@ static pid_t gettid(void) #ifndef PR_SCHED_CORE #define PR_SCHED_CORE 62 -# define PR_SCHED_CORE_GET 0 -# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ -# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ -# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ -# define PR_SCHED_CORE_MAX 4 +#define PR_SCHED_CORE_GET 0 +#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +#define PR_SCHED_CORE_MAX 4 #endif #define MAX_PROCESSES 128 diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index b83099160fbc..94886c82ae60 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -194,14 +194,14 @@ int main(int argc, char *argv[]) ksft_set_plan(7); ksft_print_msg("Running on:\n"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("uname -a"); ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); affinity(); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 783ebce8c4de..8c3a73461475 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -60,7 +60,9 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 @@ -3954,6 +3956,60 @@ TEST(user_notification_filter_empty) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); } +TEST(user_ioctl_notification_filter_empty) +{ + pid_t pid; + long ret; + int status, p[2]; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + struct seccomp_notif req = {}; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + + ASSERT_EQ(0, pipe(p)); + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int listener; + + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + close(p[1]); + close(listener); + sleep(1); + + _exit(EXIT_SUCCESS); + } + if (read(p[0], &status, 1) != 0) + _exit(EXIT_SUCCESS); + close(p[0]); + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); + EXPECT_EQ(errno, ENOENT); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + static void *do_thread(void *data) { return NULL; @@ -4755,6 +4811,83 @@ TEST(user_notification_wait_killable_fatal) EXPECT_EQ(SIGTERM, WTERMSIG(status)); } +struct tsync_vs_thread_leader_args { + pthread_t leader; +}; + +static void *tsync_vs_dead_thread_leader_sibling(void *_args) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct tsync_vs_thread_leader_args *args = _args; + void *retval; + long ret; + + ret = pthread_join(args->leader, &retval); + if (ret) + exit(1); + if (retval != _args) + exit(2); + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); + if (ret) + exit(3); + + exit(0); +} + +/* + * Ensure that a dead thread leader doesn't prevent installing new filters with + * SECCOMP_FILTER_FLAG_TSYNC from other threads. + */ +TEST(tsync_vs_dead_thread_leader) +{ + int status; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct tsync_vs_thread_leader_args *args; + pthread_t sibling; + + args = malloc(sizeof(*args)); + ASSERT_NE(NULL, args); + args->leader = pthread_self(); + + ret = pthread_create(&sibling, NULL, + tsync_vs_dead_thread_leader_sibling, args); + ASSERT_EQ(0, ret); + + /* Install a new filter just to the leader thread. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + ASSERT_EQ(0, ret); + pthread_exit(args); + exit(1); + } + + EXPECT_EQ(pid, waitpid(pid, &status, 0)); + EXPECT_EQ(0, status); +} + /* * TODO: * - expand NNP testing diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 867f88ce2570..03b5e13b872b 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -12,7 +12,7 @@ OBJCOPY := $(CROSS_COMPILE)objcopy endif INCLUDES := -I$(top_srcdir)/tools/include -HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC $(CFLAGS) HOST_LDFLAGS := -z noexecstack -lcrypto ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h index ea9bdf3a90b1..09da8f1011ce 100644 --- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h +++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h @@ -8,7 +8,7 @@ register unsigned long sp asm("sp"); register unsigned long sp asm("esp"); #elif __loongarch64 register unsigned long sp asm("$sp"); -#elif __ppc__ +#elif __powerpc__ register unsigned long sp asm("r1"); #elif __s390x__ register unsigned long sp asm("%15"); diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index e40dc5be2f66..d12ff955de0d 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } return 0; @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) /* Check that a child process is in the new timens. */ for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) return pr_fail("%ld %ld\n", now.tv_sec + OFFSET, tst.tv_sec); } diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5e7f0051bd7b..5b939f59dfa4 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime"); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index 9edd43d6b2c1..a4196bbd6e33 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime(%d)", clockid); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index beb7614941fb..5b8907bf451d 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -32,7 +32,7 @@ static void *tcheck(void *_args) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) { + if (labs(tst.tv_sec - now->tv_sec) > 5) { pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", args->tst_name, tst.tv_sec, now->tv_sec); return (void *)1UL; @@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) + if (labs(tst.tv_sec - now->tv_sec) > 5) return pr_fail("%s: unexpected value: %ld (%ld)\n", tst_name, tst.tv_sec, now->tv_sec); } diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c index 4ef2184f1558..7c07edd0d450 100644 --- a/tools/testing/selftests/timers/rtcpie.c +++ b/tools/testing/selftests/timers/rtcpie.c @@ -29,7 +29,7 @@ static const char default_rtc[] = "/dev/rtc0"; int main(int argc, char **argv) { - int i, fd, retval, irqcount = 0; + int i, fd, retval; unsigned long tmp, data, old_pie_rate; const char *rtc = default_rtc; struct timeval start, end, diff; @@ -120,7 +120,6 @@ int main(int argc, char **argv) fprintf(stderr, " %d",i); fflush(stderr); - irqcount++; } /* Disable periodic interrupts */ diff --git a/tools/testing/selftests/tmpfs/Makefile b/tools/testing/selftests/tmpfs/Makefile index aa11ccc92e5b..3be931e1193f 100644 --- a/tools/testing/selftests/tmpfs/Makefile +++ b/tools/testing/selftests/tmpfs/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -CFLAGS += -D_GNU_SOURCE TEST_GEN_PROGS := TEST_GEN_PROGS += bug-link-o-tmpfile diff --git a/tools/testing/selftests/turbostat/added_perf_counters.py b/tools/testing/selftests/turbostat/added_perf_counters.py new file mode 100755 index 000000000000..9ab4aaf45fb8 --- /dev/null +++ b/tools/testing/selftests/turbostat/added_perf_counters.py @@ -0,0 +1,178 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +class PerfCounterInfo: + def __init__(self, subsys, event): + self.subsys = subsys + self.event = event + + def get_perf_event_name(self): + return f'{self.subsys}/{self.event}/' + + def get_turbostat_perf_id(self, counter_scope, counter_type, column_name): + return f'perf/{self.subsys}/{self.event},{counter_scope},{counter_type},{column_name}' + +PERF_COUNTERS_CANDIDATES = [ + PerfCounterInfo('msr', 'mperf'), + PerfCounterInfo('msr', 'aperf'), + PerfCounterInfo('msr', 'tsc'), + PerfCounterInfo('cstate_core', 'c1-residency'), + PerfCounterInfo('cstate_core', 'c6-residency'), + PerfCounterInfo('cstate_core', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c2-residency'), + PerfCounterInfo('cstate_pkg', 'c3-residency'), + PerfCounterInfo('cstate_pkg', 'c6-residency'), + PerfCounterInfo('cstate_pkg', 'c7-residency'), + PerfCounterInfo('cstate_pkg', 'c8-residency'), + PerfCounterInfo('cstate_pkg', 'c9-residency'), + PerfCounterInfo('cstate_pkg', 'c10-residency'), +] +present_perf_counters = [] + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + if b'<not supported>' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter.') + return False + + return True + + for counter in PERF_COUNTERS_CANDIDATES: + if has_perf_counter_access(counter.get_perf_event_name()): + present_perf_counters.append(counter) + + if len(present_perf_counters) == 0: + print('SKIP: Could not read any perf counter.') + return False + + if len(present_perf_counters) != len(PERF_COUNTERS_CANDIDATES): + print(f'WARN: Could not access all of the counters - some will be left untested') + + return True + +if not check_perf_access(): + exit(0) + +turbostat_counter_source_opts = [''] + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = [b'usec', b'Time_Of_Day_Seconds', b'APIC', b'X2APIC'] + +expected_columns = [b'CPU'] +counters_argv = [] +for counter in present_perf_counters: + if counter.subsys == 'cstate_core': + counter_scope = 'core' + elif counter.subsys == 'cstate_pkg': + counter_scope = 'package' + else: + counter_scope = 'cpu' + + counter_type = 'delta' + column_name = counter.event + + cparams = counter.get_turbostat_perf_id( + counter_scope = counter_scope, + counter_type = counter_type, + column_name = column_name + ) + expected_columns.append(column_name.encode()) + counters_argv.extend(['--add', cparams]) + +expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + expected_columns + +def gen_user_friendly_cmdline(argv_): + argv = argv_[:] + ret = '' + + while len(argv) != 0: + arg = argv.pop(0) + arg_next = '' + + if arg in ('-i', '--show', '--add'): + arg_next = argv.pop(0) if len(argv) > 0 else '' + + ret += f'{arg} {arg_next} \\\n\t' + + # Remove the last separator and return + return ret[:-4] + +# +# Run turbostat for some time and send SIGINT +# +timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] +turbostat_argv = [turbostat, '-i', '0.50', '--show', 'CPU'] + counters_argv + +def check_columns_or_fail(expected_columns: list, actual_columns: list): + if len(actual_columns) != len(expected_columns): + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + + failed = False + for expected_column in expected_columns: + if expected_column not in actual_columns: + print(f'turbostat column check failed: missing column {expected_column.decode()}') + failed = True + + if failed: + exit(1) + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns, actual_columns) +print('OK') + +# +# Same, but with --debug +# +# We explicitly specify '--show CPU' to make sure turbostat +# don't show a bunch of default counters instead. +# +turbostat_argv.append('--debug') + +cmdline = gen_user_friendly_cmdline(turbostat_argv) +print(f'Running turbostat (in debug mode) with:\n\t{cmdline}\n... ', end = '', flush = True) +proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t') +check_columns_or_fail(expected_columns_debug, actual_columns) +print('OK') diff --git a/tools/testing/selftests/turbostat/smi_aperf_mperf.py b/tools/testing/selftests/turbostat/smi_aperf_mperf.py new file mode 100755 index 000000000000..6289cc47d5f0 --- /dev/null +++ b/tools/testing/selftests/turbostat/smi_aperf_mperf.py @@ -0,0 +1,157 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +from shutil import which +from os import pread + +# CDLL calls dlopen underneath. +# Calling it with None (null), we get handle to the our own image (python interpreter). +# We hope to find sched_getcpu() inside ;] +# This is a bit ugly, but helps shipping working software, so.. +try: + import ctypes + + this_image = ctypes.CDLL(None) + BASE_CPU = this_image.sched_getcpu() +except: + BASE_CPU = 0 # If we fail, set to 0 and pray it's not offline. + +MSR_IA32_MPERF = 0x000000e7 +MSR_IA32_APERF = 0x000000e8 + +def check_perf_access(): + perf = which('perf') + if perf is None: + print('SKIP: Could not find perf binary, thus could not determine perf access.') + return False + + def has_perf_counter_access(counter_name): + proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'], + capture_output = True) + + if proc_perf.returncode != 0: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + if b'<not supported>' in proc_perf.stderr: + print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.') + return False + + return True + + if not has_perf_counter_access('msr/mperf/'): + return False + if not has_perf_counter_access('msr/aperf/'): + return False + if not has_perf_counter_access('msr/smi/'): + return False + + return True + +def check_msr_access(): + try: + file_msr = open(f'/dev/cpu/{BASE_CPU}/msr', 'rb') + except: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_MPERF)) != 8: + return False + + if len(pread(file_msr.fileno(), 8, MSR_IA32_APERF)) != 8: + return False + + return True + +has_perf_access = check_perf_access() +has_msr_access = check_msr_access() + +turbostat_counter_source_opts = [''] + +if has_msr_access: + turbostat_counter_source_opts.append('--no-perf') +else: + print('SKIP: doesn\'t have MSR access, skipping run with --no-perf') + +if has_perf_access: + turbostat_counter_source_opts.append('--no-msr') +else: + print('SKIP: doesn\'t have perf access, skipping run with --no-msr') + +if not has_msr_access and not has_perf_access: + print('SKIP: No MSR nor perf access detected. Skipping the tests entirely') + exit(0) + +turbostat = which('turbostat') +if turbostat is None: + print('Could not find turbostat binary') + exit(1) + +timeout = which('timeout') +if timeout is None: + print('Could not find timeout binary') + exit(1) + +proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True) +if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + +EXPECTED_COLUMNS_DEBUG_DEFAULT = b'usec\tTime_Of_Day_Seconds\tAPIC\tX2APIC' + +SMI_APERF_MPERF_DEPENDENT_BICS = [ + 'SMI', + 'Avg_MHz', + 'Busy%', + 'Bzy_MHz', +] +if has_perf_access: + SMI_APERF_MPERF_DEPENDENT_BICS.append('IPC') + +for bic in SMI_APERF_MPERF_DEPENDENT_BICS: + for counter_source_opt in turbostat_counter_source_opts: + + # Ugly special case, but it is what it is.. + if counter_source_opt == '--no-perf' and bic == 'IPC': + continue + + expected_columns = bic.encode() + expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + f'\t{bic}'.encode() + + # + # Run turbostat for some time and send SIGINT + # + timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s'] + turbostat_argv = [turbostat, '-i', '0.50', '--show', bic] + + if counter_source_opt: + turbostat_argv.append(counter_source_opt) + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns != actual_columns: + print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}') + exit(1) + print('OK') + + # + # Same, but with --debug + # + turbostat_argv.append('--debug') + + print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True) + proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True) + if proc_turbostat.returncode != 0: + print(f'turbostat failed with {proc_turbostat.returncode}') + exit(1) + + actual_columns = proc_turbostat.stdout.split(b'\n')[0] + if expected_columns_debug != actual_columns: + print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}') + exit(1) + print('OK') diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index a8dc51af5a9c..30d5c8f0e5c7 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -6,3 +6,5 @@ vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 +vdso_test_getrandom +vdso_test_chacha diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008f9..3de8e7e052ae 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,48 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 +endif +TEST_GEN_PROGS += vdso_test_correctness +ifeq ($(uname_M),x86_64) +TEST_GEN_PROGS += vdso_test_getrandom +ifneq ($(SODIUM),) +TEST_GEN_PROGS += vdso_test_chacha +endif endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl + +$(OUTPUT)/vdso_test_getrandom: parse_vdso.c +$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ + -isystem $(top_srcdir)/include/uapi + +$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ + -isystem $(top_srcdir)/arch/$(ARCH)/include \ + -isystem $(top_srcdir)/include \ + -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ + -Wa,--noexecstack $(SODIUM) diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 413f75620a35..4ae417372e9e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -55,14 +55,20 @@ static struct vdso_info ELF(Verdef) *verdef; } vdso_info; -/* Straight from the ELF specification. */ -static unsigned long elf_hash(const unsigned char *name) +/* + * Straight from the ELF specification...and then tweaked slightly, in order to + * avoid a few clang warnings. + */ +static unsigned long elf_hash(const char *name) { unsigned long h = 0, g; - while (*name) + const unsigned char *uch_name = (const unsigned char *)name; + + while (*uch_name) { - h = (h << 4) + *name++; - if (g = h & 0xf0000000) + h = (h << 4) + *uch_name++; + g = h & 0xf0000000; + if (g) h ^= g >> 24; h &= ~g; } diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 8a44ff973ee1..27f6fdf11969 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -18,7 +18,7 @@ #include "parse_vdso.h" -/* We need a libc functions... */ +/* We need some libc functions... */ int strcmp(const char *a, const char *b) { /* This implementation is buggy: it never returns -1. */ @@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b) return 0; } +/* + * The clang build needs this, although gcc does not. + * Stolen from lib/string.c. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + /* ...and two syscalls. This is x86-specific. */ static inline long x86_syscall3(long nr, long a0, long a1, long a2) { @@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n) } } -__attribute__((externally_visible)) void c_main(void **stack) +void c_main(void **stack) { /* Parse the stack */ long argc = (long)*stack; diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c new file mode 100644 index 000000000000..e38f44e5f803 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <sodium/crypto_stream_chacha20.h> +#include <sys/random.h> +#include <string.h> +#include <stdint.h> +#include "../kselftest.h" + +extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks); + +int main(int argc, char *argv[]) +{ + enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 }; + static const uint8_t nonce[8] = { 0 }; + uint32_t counter[2]; + uint8_t key[32]; + uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS]; + + ksft_print_header(); + ksft_set_plan(1); + + for (unsigned int trial = 0; trial < TRIALS; ++trial) { + if (getrandom(key, sizeof(key), 0) != sizeof(key)) { + printf("getrandom() failed!\n"); + return KSFT_SKIP; + } + crypto_stream_chacha20(output1, sizeof(output1), nonce, key); + for (unsigned int split = 0; split < BLOCKS; ++split) { + memset(output2, 'X', sizeof(output2)); + memset(counter, 0, sizeof(counter)); + if (split) + __arch_chacha20_blocks_nostack(output2, key, counter, split); + __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split); + if (memcmp(output1, output2, sizeof(output1))) + return KSFT_FAIL; + } + } + ksft_test_result_pass("chacha: PASS\n"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c new file mode 100644 index 000000000000..05122425a873 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <assert.h> +#include <pthread.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <signal.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/random.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/random.h> + +#include "../kselftest.h" +#include "parse_vdso.h" + +#ifndef timespecsub +#define timespecsub(tsp, usp, vsp) \ + do { \ + (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \ + (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \ + if ((vsp)->tv_nsec < 0) { \ + (vsp)->tv_sec--; \ + (vsp)->tv_nsec += 1000000000L; \ + } \ + } while (0) +#endif + +static struct { + pthread_mutex_t lock; + void **states; + size_t len, cap; +} grnd_allocator = { + .lock = PTHREAD_MUTEX_INITIALIZER +}; + +static struct { + ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t); + pthread_key_t key; + pthread_once_t initialized; + struct vgetrandom_opaque_params params; +} grnd_ctx = { + .initialized = PTHREAD_ONCE_INIT +}; + +static void *vgetrandom_get_state(void) +{ + void *state = NULL; + + pthread_mutex_lock(&grnd_allocator.lock); + if (!grnd_allocator.len) { + size_t page_size = getpagesize(); + size_t new_cap; + size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ + void *new_block, *new_states; + + alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); + num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size); + new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0); + if (new_block == MAP_FAILED) + goto out; + + new_cap = grnd_allocator.cap + num; + new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states)); + if (!new_states) + goto unmap; + grnd_allocator.cap = new_cap; + grnd_allocator.states = new_states; + + for (size_t i = 0; i < num; ++i) { + if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size) + new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); + grnd_allocator.states[i] = new_block; + new_block += grnd_ctx.params.size_of_opaque_state; + } + grnd_allocator.len = num; + goto success; + + unmap: + munmap(new_block, alloc_size); + goto out; + } +success: + state = grnd_allocator.states[--grnd_allocator.len]; + +out: + pthread_mutex_unlock(&grnd_allocator.lock); + return state; +} + +static void vgetrandom_put_state(void *state) +{ + if (!state) + return; + pthread_mutex_lock(&grnd_allocator.lock); + grnd_allocator.states[grnd_allocator.len++] = state; + pthread_mutex_unlock(&grnd_allocator.lock); +} + +static void vgetrandom_init(void) +{ + if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0) + return; + unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + if (!sysinfo_ehdr) { + printf("AT_SYSINFO_EHDR is not present!\n"); + exit(KSFT_SKIP); + } + vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); + grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom"); + if (!grnd_ctx.fn) { + printf("__vdso_getrandom is missing!\n"); + exit(KSFT_FAIL); + } + if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) { + printf("failed to fetch vgetrandom params!\n"); + exit(KSFT_FAIL); + } +} + +static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) +{ + void *state; + + pthread_once(&grnd_ctx.initialized, vgetrandom_init); + state = pthread_getspecific(grnd_ctx.key); + if (!state) { + state = vgetrandom_get_state(); + if (pthread_setspecific(grnd_ctx.key, state) != 0) { + vgetrandom_put_state(state); + state = NULL; + } + if (!state) { + printf("vgetrandom_get_state failed!\n"); + exit(KSFT_FAIL); + } + } + return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state); +} + +enum { TRIALS = 25000000, THREADS = 256 }; + +static void *test_vdso_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = vgetrandom(&val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void *test_libc_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = getrandom(&val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void *test_syscall_getrandom(void *) +{ + for (size_t i = 0; i < TRIALS; ++i) { + unsigned int val; + ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0); + assert(ret == sizeof(val)); + } + return NULL; +} + +static void bench_single(void) +{ + struct timespec start, end, diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + test_vdso_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" vdso: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + test_libc_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" libc: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + test_syscall_getrandom(NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf("syscall: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec); +} + +static void bench_multi(void) +{ + struct timespec start, end, diff; + pthread_t threads[THREADS]; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" vdso: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" libc: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (size_t i = 0; i < THREADS; ++i) + assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); + for (size_t i = 0; i < THREADS; ++i) + pthread_join(threads[i], NULL); + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &diff); + printf(" syscall: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec); +} + +static void fill(void) +{ + uint8_t weird_size[323929]; + for (;;) + vgetrandom(weird_size, sizeof(weird_size), 0); +} + +static void kselftest(void) +{ + uint8_t weird_size[1263]; + + ksft_print_header(); + ksft_set_plan(1); + + for (size_t i = 0; i < 1000; ++i) { + ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); + if (ret != sizeof(weird_size)) + exit(KSFT_FAIL); + } + + ksft_test_result_pass("getrandom: PASS\n"); + exit(KSFT_PASS); +} + +static void usage(const char *argv0) +{ + fprintf(stderr, "Usage: %s [bench-single|bench-multi|fill]\n", argv0); +} + +int main(int argc, char *argv[]) +{ + if (argc == 1) { + kselftest(); + return 0; + } + + if (argc != 2) { + usage(argv[0]); + return 1; + } + if (!strcmp(argv[1], "bench-single")) + bench_single(); + else if (!strcmp(argv[1], "bench-multi")) + bench_multi(); + else if (!strcmp(argv[1], "fill")) + fill(); + else { + usage(argv[0]); + return 1; + } + return 0; +} diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e95bd56b332f..35856b11c143 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu max -machine microvm -no-acpi +QEMU_MACHINE := -cpu max -machine microvm,acpi=off endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl @@ -120,9 +120,9 @@ KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi +QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0b872c0a42d2..5c8757a25998 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -40,6 +40,13 @@ CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES) # call32_from_64 in thunks.S uses absolute addresses. ifeq ($(CAN_BUILD_WITH_NOPIE),1) CFLAGS += -no-pie + +ifneq ($(LLVM),) +# clang only wants to see -no-pie during linking. Here, we don't have a separate +# linking stage, so a compiler warning is unavoidable without (wastefully) +# restructuring the Makefile. Avoid this by simply disabling that warning. +CFLAGS += -Wno-unused-command-line-argument +endif endif define gen-target-rule-32 @@ -73,10 +80,10 @@ all_64: $(BINARIES_64) EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h - $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm $(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h - $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl # x86_64 users should be encouraged to install 32-bit libraries ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) @@ -100,10 +107,22 @@ warn_32bit_failure: exit 0; endif -# Some tests have additional dependencies. -$(OUTPUT)/sysret_ss_attrs_64: thunks.S -$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S -$(OUTPUT)/test_syscall_vdso_32: thunks_32.S +# Add an additional file to the source file list for a given target, and also +# add a Makefile dependency on that same file. However, do these separately, so +# that the compiler invocation ("$(CC) file1.c file2.S") is not combined with +# the dependencies ("header3.h"), because clang, unlike gcc, will not accept +# header files as an input to the compiler invocation. +define extra-files +$(OUTPUT)/$(1): EXTRA_FILES := $(2) +$(OUTPUT)/$(1): $(2) +endef + +$(eval $(call extra-files,sysret_ss_attrs_64,thunks.S)) +$(eval $(call extra-files,ptrace_syscall_32,raw_syscall_helper_32.S)) +$(eval $(call extra-files,test_syscall_vdso_32,thunks_32.S)) +$(eval $(call extra-files,fsgsbase_restore_64,clang_helpers_64.S)) +$(eval $(call extra-files,fsgsbase_restore_32,clang_helpers_32.S)) +$(eval $(call extra-files,sysret_rip_64,clang_helpers_64.S)) # check_initial_reg_state is special: it needs a custom entry, and it # needs to be static so that its interpreter doesn't destroy its initial diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 95aad6d8849b..1fdf35a4d7f6 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -39,16 +39,6 @@ struct xsave_buffer { }; }; -static inline uint64_t xgetbv(uint32_t index) -{ - uint32_t eax, edx; - - asm volatile("xgetbv;" - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((uint64_t)edx << 32); -} - static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) { uint32_t rfbm_lo = rfbm; @@ -164,12 +154,6 @@ static inline void clear_xstate_header(struct xsave_buffer *buffer) memset(&buffer->header, 0, sizeof(buffer->header)); } -static inline uint64_t get_xstatebv(struct xsave_buffer *buffer) -{ - /* XSTATE_BV is at the beginning of the header: */ - return *(uint64_t *)&buffer->header; -} - static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) { /* XSTATE_BV is at the beginning of the header: */ diff --git a/tools/testing/selftests/x86/clang_helpers_32.S b/tools/testing/selftests/x86/clang_helpers_32.S new file mode 100644 index 000000000000..dc16271bac70 --- /dev/null +++ b/tools/testing/selftests/x86/clang_helpers_32.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 32-bit assembly helpers for asm operations that lack support in both gcc and + * clang. For example, clang asm does not support segment prefixes. + */ +.global dereference_seg_base +dereference_seg_base: + mov %fs:(0), %eax + ret + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/clang_helpers_64.S b/tools/testing/selftests/x86/clang_helpers_64.S new file mode 100644 index 000000000000..185a69dbf39c --- /dev/null +++ b/tools/testing/selftests/x86/clang_helpers_64.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 64-bit assembly helpers for asm operations that lack support in both gcc and + * clang. For example, clang asm does not support segment prefixes. + */ +.global dereference_seg_base + +dereference_seg_base: + mov %gs:(0), %rax + ret + +.global test_page +.global test_syscall_insn + +.pushsection ".text", "ax" +.balign 4096 +test_page: .globl test_page + .fill 4094,1,0xcc + +test_syscall_insn: + syscall + +.ifne . - test_page - 4096 + .error "test page is not one page long" +.endif +.popsection + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 8c780cce941d..50cf32de6313 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -109,11 +109,6 @@ static inline void wrgsbase(unsigned long gsbase) asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); } -static inline void wrfsbase(unsigned long fsbase) -{ - asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); -} - enum which_base { FS, GS }; static unsigned long read_base(enum which_base which) @@ -212,7 +207,6 @@ static void mov_0_gs(unsigned long initial_base, bool schedule) } static volatile unsigned long remote_base; -static volatile bool remote_hard_zero; static volatile unsigned int ftx; /* diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c index 6fffadc51579..224058c1e4b2 100644 --- a/tools/testing/selftests/x86/fsgsbase_restore.c +++ b/tools/testing/selftests/x86/fsgsbase_restore.c @@ -39,12 +39,11 @@ # define SEG "%fs" #endif -static unsigned int dereference_seg_base(void) -{ - int ret; - asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret)); - return ret; -} +/* + * Defined in clang_helpers_[32|64].S, because unlike gcc, clang inline asm does + * not support segmentation prefixes. + */ +unsigned int dereference_seg_base(void); static void init_seg(void) { diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 5d7961a5f7f6..0b75b29f794b 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -487,7 +487,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; if (asm_ss != sig_ss && sig == SIGTRAP) { /* Sanity check failure. */ - printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %x, ax = %llx\n", ss, *ssptr(ctx), (unsigned long long)asm_ss); nerrs++; } diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index 461fa41a4d02..48ab065a76f9 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -29,7 +29,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), err(1, "sigaction"); } -static volatile sig_atomic_t sig_traps; static sigjmp_buf jmpbuf; static volatile sig_atomic_t n_errs; diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c index 84d74be1d902..b30de9aaa6d4 100644 --- a/tools/testing/selftests/x86/sysret_rip.c +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -22,21 +22,13 @@ #include <sys/mman.h> #include <assert.h> - -asm ( - ".pushsection \".text\", \"ax\"\n\t" - ".balign 4096\n\t" - "test_page: .globl test_page\n\t" - ".fill 4094,1,0xcc\n\t" - "test_syscall_insn:\n\t" - "syscall\n\t" - ".ifne . - test_page - 4096\n\t" - ".error \"test page is not one page long\"\n\t" - ".endif\n\t" - ".popsection" - ); - +/* + * These items are in clang_helpers_64.S, in order to avoid clang inline asm + * limitations: + */ +void test_syscall_ins(void); extern const char test_page[]; + static void const *current_test_page_addr = test_page; static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c index 09789c0ce3e9..b9ae9d8cebcb 100644 --- a/tools/testing/selftests/x86/test_FISTTP.c +++ b/tools/testing/selftests/x86/test_FISTTP.c @@ -25,7 +25,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fld1""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fld1""\n" " fisttpl res32""\n" " fld1""\n" @@ -45,7 +45,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fldpi""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldpi""\n" " fisttpl res32""\n" " fldpi""\n" @@ -66,7 +66,7 @@ int test(void) asm volatile ("\n" " fldpi""\n" " fchs""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldpi""\n" " fchs""\n" " fisttpl res32""\n" @@ -88,7 +88,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fldln2""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldln2""\n" " fisttpl res32""\n" " fldln2""\n" diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c index ee909a7927f9..21af54d5f4ea 100644 --- a/tools/testing/selftests/x86/test_shadow_stack.c +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -34,6 +34,7 @@ #include <sys/ptrace.h> #include <sys/signal.h> #include <linux/elf.h> +#include <linux/perf_event.h> /* * Define the ABI defines if needed, so people can run the tests @@ -734,6 +735,144 @@ int test_32bit(void) return !segv_triggered; } +static int parse_uint_from_file(const char *file, const char *fmt) +{ + int err, ret; + FILE *f; + + f = fopen(file, "re"); + if (!f) { + err = -errno; + printf("failed to open '%s': %d\n", file, err); + return err; + } + err = fscanf(f, fmt, &ret); + if (err != 1) { + err = err == EOF ? -EIO : -errno; + printf("failed to parse '%s': %d\n", file, err); + fclose(f); + return err; + } + fclose(f); + return ret; +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static ssize_t get_uprobe_offset(const void *addr) +{ + size_t start, end, base; + char buf[256]; + bool found = false; + FILE *f; + + f = fopen("/proc/self/maps", "r"); + if (!f) + return -errno; + + while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { + if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { + found = true; + break; + } + } + + fclose(f); + + if (!found) + return -ESRCH; + + return (uintptr_t)addr - start + base; +} + +static __attribute__((noinline)) void uretprobe_trigger(void) +{ + asm volatile (""); +} + +/* + * This test setups return uprobe, which is sensitive to shadow stack + * (crashes without extra fix). After executing the uretprobe we fail + * the test if we receive SIGSEGV, no crash means we're good. + * + * Helper functions above borrowed from bpf selftests. + */ +static int test_uretprobe(void) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + const char *file = "/proc/self/exe"; + int bit, fd = 0, type, err = 1; + struct perf_event_attr attr; + struct sigaction sa = {}; + ssize_t offset; + + type = determine_uprobe_perf_type(); + if (type < 0) { + if (type == -ENOENT) + printf("[SKIP]\tUretprobe test, uprobes are not available\n"); + return 0; + } + + offset = get_uprobe_offset(uretprobe_trigger); + if (offset < 0) + return 1; + + bit = determine_uprobe_retprobe_bit(); + if (bit < 0) + return 1; + + sa.sa_sigaction = segv_gp_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + /* Setup return uprobe through perf event interface. */ + memset(&attr, 0, attr_sz); + attr.size = attr_sz; + attr.type = type; + attr.config = 1 << bit; + attr.config1 = (__u64) (unsigned long) file; + attr.config2 = offset; + + fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, -1 /* cpu */, + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); + if (fd < 0) + goto out; + + if (sigsetjmp(jmp_buffer, 1)) + goto out; + + ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK); + + /* + * This either segfaults and goes through sigsetjmp above + * or succeeds and we're good. + */ + uretprobe_trigger(); + + printf("[OK]\tUretprobe test\n"); + err = 0; + +out: + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); + signal(SIGSEGV, SIG_DFL); + if (fd) + close(fd); + return err; +} + void segv_handler_ptrace(int signum, siginfo_t *si, void *uc) { /* The SSP adjustment caused a segfault. */ @@ -926,6 +1065,12 @@ int main(int argc, char *argv[]) goto out; } + if (test_uretprobe()) { + ret = 1; + printf("[FAIL]\turetprobe test\n"); + goto out; + } + return ret; out: diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index d4c8e8d79d38..6de11b4df458 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -97,11 +97,6 @@ static inline long sys_gtod(struct timeval *tv, struct timezone *tz) return syscall(SYS_gettimeofday, tv, tz); } -static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) -{ - return syscall(SYS_clock_gettime, id, ts); -} - static inline long sys_time(time_t *t) { return syscall(SYS_time, t); @@ -252,7 +247,7 @@ static void test_getcpu(int cpu) if (ret_sys == 0) { if (cpu_sys != cpu) - ksft_print_msg("syscall reported CPU %hu but should be %d\n", + ksft_print_msg("syscall reported CPU %u but should be %d\n", cpu_sys, cpu); have_node = true; @@ -270,10 +265,10 @@ static void test_getcpu(int cpu) if (cpu_vdso != cpu || node_vdso != node) { if (cpu_vdso != cpu) - ksft_print_msg("vDSO reported CPU %hu but should be %d\n", + ksft_print_msg("vDSO reported CPU %u but should be %d\n", cpu_vdso, cpu); if (node_vdso != node) - ksft_print_msg("vDSO reported node %hu but should be %hu\n", + ksft_print_msg("vDSO reported node %u but should be %u\n", node_vdso, node); ksft_test_result_fail("Wrong values\n"); } else { @@ -295,10 +290,10 @@ static void test_getcpu(int cpu) if (cpu_vsys != cpu || node_vsys != node) { if (cpu_vsys != cpu) - ksft_print_msg("vsyscall reported CPU %hu but should be %d\n", + ksft_print_msg("vsyscall reported CPU %u but should be %d\n", cpu_vsys, cpu); if (node_vsys != node) - ksft_print_msg("vsyscall reported node %hu but should be %hu\n", + ksft_print_msg("vsyscall reported node %u but should be %u\n", node_vsys, node); ksft_test_result_fail("Wrong values\n"); } else { diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c index fe99f2434155..ac8d8e1e9805 100644 --- a/tools/testing/selftests/x86/vdso_restorer.c +++ b/tools/testing/selftests/x86/vdso_restorer.c @@ -92,4 +92,6 @@ int main() printf("[FAIL]\t!SA_SIGINFO handler was not called\n"); nerrs++; } + + return nerrs; } diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index a7f56a09ca9f..6e0b4e95e230 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -13,3 +13,16 @@ CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-p clean: ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test -include *.d + +VSOCK_INSTALL_PATH ?= + +install: all +ifdef VSOCK_INSTALL_PATH + mkdir -p $(VSOCK_INSTALL_PATH) + install -m 744 vsock_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_perf $(VSOCK_INSTALL_PATH) + install -m 744 vsock_diag_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_uring_test $(VSOCK_INSTALL_PATH) +else + $(error Error: set VSOCK_INSTALL_PATH to use install) +endif diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 7be17d09f7e8..214e2c93fde0 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -374,6 +374,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too { struct osnoise_hist_data *data = tool->data; struct trace_instance *trace = &tool->trace; + int has_samples = 0; int bucket, cpu; int total; @@ -402,11 +403,25 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too continue; } + /* There are samples above the threshold */ + has_samples = 1; trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); } + /* + * If no samples were recorded, skip calculations, print zeroed statistics + * and return. + */ + if (!has_samples) { + trace_seq_reset(trace->seq); + trace_seq_printf(trace->seq, "over: 0\ncount: 0\nmin: 0\navg: 0\nmax: 0\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + return; + } + if (!params->no_index) trace_seq_printf(trace->seq, "over: "); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 07ba55d4ec06..f594a44df840 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -42,6 +42,7 @@ struct osnoise_top_params { int hk_cpus; int warmup; int buffer_size; + int pretty_output; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; @@ -163,7 +164,9 @@ static void osnoise_top_header(struct osnoise_tool *top) get_duration(top->start_time, duration, sizeof(duration)); - trace_seq_printf(s, "\033[2;37;40m"); + if (params->pretty_output) + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " "); if (params->mode == MODE_OSNOISE) { @@ -174,12 +177,16 @@ static void osnoise_top_header(struct osnoise_tool *top) } trace_seq_printf(s, " "); - trace_seq_printf(s, "\033[0;0;0m"); + + if (params->pretty_output) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); trace_seq_printf(s, "duration: %9s | time is in us\n", duration); - trace_seq_printf(s, "\033[2;30;47m"); + if (params->pretty_output) + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); trace_seq_printf(s, " Noise "); trace_seq_printf(s, " %% CPU Aval "); @@ -192,7 +199,8 @@ static void osnoise_top_header(struct osnoise_tool *top) trace_seq_printf(s, " IRQ Softirq Thread"); eol: - trace_seq_printf(s, "\033[0;0;0m"); + if (params->pretty_output) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); } @@ -619,6 +627,9 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p auto_house_keeping(¶ms->monitored_cpus); } + if (isatty(1) && !params->quiet) + params->pretty_output = 1; + return 0; out_err: diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 98ff808d6f0c..43d3a6aa1dcf 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -139,7 +139,7 @@ static int parallel_test(u64 features, bool fast_vringh) { void *host_map, *guest_map; - int fd, mapsize, to_guest[2], to_host[2]; + int pipe_ret, fd, mapsize, to_guest[2], to_host[2]; unsigned long xfers = 0, notifies = 0, receives = 0; unsigned int first_cpu, last_cpu; cpu_set_t cpu_set; @@ -161,8 +161,11 @@ static int parallel_test(u64 features, host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - pipe(to_guest); - pipe(to_host); + pipe_ret = pipe(to_guest); + assert(!pipe_ret); + + pipe_ret = pipe(to_host); + assert(!pipe_ret); CPU_ZERO(&cpu_set); find_cpus(&first_cpu, &last_cpu); |