diff options
Diffstat (limited to 'tools')
116 files changed, 4991 insertions, 419 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 16e006f708ca..4602464ebdfb 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -125,6 +126,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 4e76630dd655..97c3478ee6e7 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,7 @@ #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -154,6 +155,18 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 serror_pending; + __u8 serror_has_esr; + /* Align it to 8 bytes */ + __u8 pad[6]; + __u64 serror_esr; + } exception; + __u32 reserved[12]; +}; + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 4cdaa55fabfe..9a50f02b9894 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -4,7 +4,7 @@ /* * KVM s390 specific structures and definitions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008, 2018 * * Author(s): Carsten Otte <cotte@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> @@ -225,6 +225,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_FPRS (1UL << 8) #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) +#define KVM_SYNC_ETOKEN (1UL << 11) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 #define SDNXL (1UL << SDNXC) @@ -258,6 +259,8 @@ struct kvm_sync_regs { struct { __u64 reserved1[2]; __u64 gscb[4]; + __u64 etoken; + __u64 etoken_extension; }; }; }; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index c535c2fdea13..86299efa804a 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -378,4 +378,41 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 + +#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state { + __u64 vmxon_pa; + __u64 vmcs_pa; + + struct { + __u16 flags; + } smm; +}; + +/* for KVM_CAP_NESTED_STATE */ +struct kvm_nested_state { + /* KVM_STATE_* flags */ + __u16 flags; + + /* 0 for VMX, 1 for SVM. */ + __u16 format; + + /* 128 for SVM, 128 + VMCS size for VMX. */ + __u32 size; + + union { + /* VMXON, VMCS */ + struct kvm_vmx_nested_state vmx; + + /* Pad the header to 128 bytes. */ + __u8 pad[120]; + }; + + __u8 data[0]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst new file mode 100644 index 000000000000..408ec30d8872 --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -0,0 +1,139 @@ +================ +bpftool-net +================ +------------------------------------------------------------------------------- +tool for inspection of netdev/tc related bpf prog attachments +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **net** *COMMAND* + + *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + + *COMMANDS* := + { **show** | **list** } [ **dev** name ] | **help** + +NET COMMANDS +============ + +| **bpftool** **net { show | list } [ dev name ]** +| **bpftool** **net help** + +DESCRIPTION +=========== + **bpftool net { show | list } [ dev name ]** + List bpf program attachments in the kernel networking subsystem. + + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. + + **bpftool net help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -v, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, this + option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + +EXAMPLES +======== + +| **# bpftool net** + +:: + + xdp: + eth0(2) driver id 198 + + tc: + eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act [] + eth0(2) clsact/ingress fbflow_icmp id 130246 act [] + eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726 + eth0(2) clsact/egress cls_fg_dscp id 108619 act [] + eth0(2) clsact/egress fbflow_egress id 130245 + +| +| **# bpftool -jp net** + +:: + + [{ + "xdp": [{ + "devname": "eth0", + "ifindex": 2, + "mode": "driver", + "id": 198 + } + ], + "tc": [{ + "devname": "eth0", + "ifindex": 2, + "kind": "htb", + "name": "prefix_matcher.o:[cls_prefix_matcher_htb]", + "id": 111727, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/ingress", + "name": "fbflow_icmp", + "id": 130246, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]", + "id": 111726, + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "cls_fg_dscp", + "id": 108619, + "act": [] + },{ + "devname": "eth0", + "ifindex": 2, + "kind": "clsact/egress", + "name": "fbflow_egress", + "id": 130245, + } + ] + } + ] + + +SEE ALSO +======== + **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index b6f5d560460d..8dda77daeda9 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -16,7 +16,7 @@ SYNOPSIS **bpftool** **version** - *OBJECT* := { **map** | **program** | **cgroup** | **perf** } + *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** } *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } | { **-j** | **--json** } [{ **-p** | **--pretty** }] } @@ -32,6 +32,8 @@ SYNOPSIS *PERF-COMMANDS* := { **show** | **list** | **help** } + *NET-COMMANDS* := { **show** | **list** | **help** } + DESCRIPTION =========== *bpftool* allows for inspection and simple modification of BPF objects @@ -58,4 +60,4 @@ OPTIONS SEE ALSO ======== **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8) - **bpftool-perf**\ (8) + **bpftool-perf**\ (8), **bpftool-net**\ (8) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 598066c40191..df1060b852c1 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -494,10 +494,10 @@ _bpftool() _filedir return 0 ;; - tree) - _filedir - return 0 - ;; + tree) + _filedir + return 0 + ;; attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ @@ -552,6 +552,15 @@ _bpftool() ;; esac ;; + net) + case $command in + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'help \ + show list' -- "$cur" ) ) + ;; + esac + ;; esac } && complete -F _bpftool bpftool diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d15a62be6cf0..79dc3f193547 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -85,7 +85,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf }\n" + " OBJECT := { prog | map | cgroup | perf | net }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -215,6 +215,7 @@ static const struct cmd cmds[] = { { "map", do_map }, { "cgroup", do_cgroup }, { "perf", do_perf }, + { "net", do_net }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 238e734d75b3..40492cdc4e53 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -136,6 +136,7 @@ int do_map(int argc, char **arg); int do_event_pipe(int argc, char **argv); int do_cgroup(int argc, char **arg); int do_perf(int argc, char **arg); +int do_net(int argc, char **arg); int prog_parse_fd(int *argc, char ***argv); int map_parse_fd(int *argc, char ***argv); @@ -165,4 +166,11 @@ struct btf_dumper { */ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id, const void *data); + +struct nlattr; +struct ifinfomsg; +struct tcmsg; +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); +int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, + const char *devname, int ifindex); #endif diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 9c55077ca5dd..e22fbe8b975f 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -71,6 +71,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_XSKMAP] = "xskmap", [BPF_MAP_TYPE_SOCKHASH] = "sockhash", [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", + [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", }; static bool map_is_per_cpu(__u32 type) @@ -673,12 +674,6 @@ static int do_dump(int argc, char **argv) if (fd < 0) return -1; - if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) { - p_err("Dumping maps of maps and program maps not supported"); - close(fd); - return -1; - } - key = malloc(info.key_size); value = alloc_value(&info); if (!key || !value) { @@ -732,7 +727,9 @@ static int do_dump(int argc, char **argv) } else { print_entry_plain(&info, key, value); } - } else { + num_elems++; + } else if (!map_is_map_of_maps(info.type) && + !map_is_map_of_progs(info.type)) { if (json_output) { jsonw_name(json_wtr, "key"); print_hex_data_json(key, info.key_size); @@ -749,7 +746,6 @@ static int do_dump(int argc, char **argv) } prev_key = key; - num_elems++; } if (json_output) diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c new file mode 100644 index 000000000000..ed205ee57655 --- /dev/null +++ b/tools/bpf/bpftool/net.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#define _GNU_SOURCE +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <libbpf.h> +#include <net/if.h> +#include <linux/if.h> +#include <linux/rtnetlink.h> +#include <linux/tc_act/tc_bpf.h> +#include <sys/socket.h> + +#include <bpf.h> +#include <nlattr.h> +#include "main.h" +#include "netlink_dumper.h" + +struct ip_devname_ifindex { + char devname[64]; + int ifindex; +}; + +struct bpf_netdev_t { + struct ip_devname_ifindex *devices; + int used_len; + int array_len; + int filter_idx; +}; + +struct tc_kind_handle { + char kind[64]; + int handle; +}; + +struct bpf_tcinfo_t { + struct tc_kind_handle *handle_array; + int used_len; + int array_len; + bool is_qdisc; +}; + +struct bpf_filter_t { + const char *kind; + const char *devname; + int ifindex; +}; + +static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_netdev_t *netinfo = cookie; + struct ifinfomsg *ifinfo = msg; + + if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) + return 0; + + if (netinfo->used_len == netinfo->array_len) { + netinfo->devices = realloc(netinfo->devices, + (netinfo->array_len + 16) * + sizeof(struct ip_devname_ifindex)); + if (!netinfo->devices) + return -ENOMEM; + + netinfo->array_len += 16; + } + netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; + snprintf(netinfo->devices[netinfo->used_len].devname, + sizeof(netinfo->devices[netinfo->used_len].devname), + "%s", + tb[IFLA_IFNAME] ? nla_getattr_str(tb[IFLA_IFNAME]) : ""); + netinfo->used_len++; + + return do_xdp_dump(ifinfo, tb); +} + +static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + struct bpf_tcinfo_t *tcinfo = cookie; + struct tcmsg *info = msg; + + if (tcinfo->is_qdisc) { + /* skip clsact qdisc */ + if (tb[TCA_KIND] && + strcmp(nla_data(tb[TCA_KIND]), "clsact") == 0) + return 0; + if (info->tcm_handle == 0) + return 0; + } + + if (tcinfo->used_len == tcinfo->array_len) { + tcinfo->handle_array = realloc(tcinfo->handle_array, + (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); + if (!tcinfo->handle_array) + return -ENOMEM; + + tcinfo->array_len += 16; + } + tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; + snprintf(tcinfo->handle_array[tcinfo->used_len].kind, + sizeof(tcinfo->handle_array[tcinfo->used_len].kind), + "%s", + tb[TCA_KIND] ? nla_getattr_str(tb[TCA_KIND]) : "unknown"); + tcinfo->used_len++; + + return 0; +} + +static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb) +{ + const struct bpf_filter_t *filter_info = cookie; + + return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind, + filter_info->devname, filter_info->ifindex); +} + +static int show_dev_tc_bpf(int sock, unsigned int nl_pid, + struct ip_devname_ifindex *dev) +{ + struct bpf_filter_t filter_info; + struct bpf_tcinfo_t tcinfo; + int i, handle, ret = 0; + + tcinfo.handle_array = NULL; + tcinfo.used_len = 0; + tcinfo.array_len = 0; + + tcinfo.is_qdisc = false; + ret = nl_get_class(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, + &tcinfo); + if (ret) + goto out; + + tcinfo.is_qdisc = true; + ret = nl_get_qdisc(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg, + &tcinfo); + if (ret) + goto out; + + filter_info.devname = dev->devname; + filter_info.ifindex = dev->ifindex; + for (i = 0; i < tcinfo.used_len; i++) { + filter_info.kind = tcinfo.handle_array[i].kind; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, + tcinfo.handle_array[i].handle, + dump_filter_nlmsg, + &filter_info); + if (ret) + goto out; + } + + /* root, ingress and egress handle */ + handle = TC_H_ROOT; + filter_info.kind = "root"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + filter_info.kind = "clsact/ingress"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + + handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS); + filter_info.kind = "clsact/egress"; + ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle, + dump_filter_nlmsg, &filter_info); + if (ret) + goto out; + +out: + free(tcinfo.handle_array); + return 0; +} + +static int do_show(int argc, char **argv) +{ + int i, sock, ret, filter_idx = -1; + struct bpf_netdev_t dev_array; + unsigned int nl_pid; + char err_buf[256]; + + if (argc == 2) { + if (strcmp(argv[0], "dev") != 0) + usage(); + filter_idx = if_nametoindex(argv[1]); + if (filter_idx == 0) { + fprintf(stderr, "invalid dev name %s\n", argv[1]); + return -1; + } + } else if (argc != 0) { + usage(); + } + + sock = bpf_netlink_open(&nl_pid); + if (sock < 0) { + fprintf(stderr, "failed to open netlink sock\n"); + return -1; + } + + dev_array.devices = NULL; + dev_array.used_len = 0; + dev_array.array_len = 0; + dev_array.filter_idx = filter_idx; + + if (json_output) + jsonw_start_array(json_wtr); + NET_START_OBJECT; + NET_START_ARRAY("xdp", "%s:\n"); + ret = nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array); + NET_END_ARRAY("\n"); + + if (!ret) { + NET_START_ARRAY("tc", "%s:\n"); + for (i = 0; i < dev_array.used_len; i++) { + ret = show_dev_tc_bpf(sock, nl_pid, + &dev_array.devices[i]); + if (ret) + break; + } + NET_END_ARRAY("\n"); + } + NET_END_OBJECT; + if (json_output) + jsonw_end_array(json_wtr); + + if (ret) { + if (json_output) + jsonw_null(json_wtr); + libbpf_strerror(ret, err_buf, sizeof(err_buf)); + fprintf(stderr, "Error: %s\n", err_buf); + } + free(dev_array.devices); + close(sock); + return ret; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %s %s { show | list } [dev <devname>]\n" + " %s %s help\n" + "Note: Only xdp and tc attachments are supported now.\n" + " For progs attached to cgroups, use \"bpftool cgroup\"\n" + " to dump program attachments. For program types\n" + " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n" + " consult iproute2.\n", + bin_name, argv[-2], bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "show", do_show }, + { "list", do_show }, + { "help", do_help }, + { 0 } +}; + +int do_net(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c new file mode 100644 index 000000000000..6f5e9cc6836c --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#include <stdlib.h> +#include <string.h> +#include <libbpf.h> +#include <linux/rtnetlink.h> +#include <linux/tc_act/tc_bpf.h> + +#include <nlattr.h> +#include "main.h" +#include "netlink_dumper.h" + +static void xdp_dump_prog_id(struct nlattr **tb, int attr, + const char *mode, + bool new_json_object) +{ + if (!tb[attr]) + return; + + if (new_json_object) + NET_START_OBJECT + NET_DUMP_STR("mode", " %s", mode); + NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[attr])) + if (new_json_object) + NET_END_OBJECT +} + +static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex, + const char *name) +{ + struct nlattr *tb[IFLA_XDP_MAX + 1]; + unsigned char mode; + + if (nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0) + return -1; + + if (!tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = nla_getattr_u8(tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + NET_START_OBJECT; + if (name) + NET_DUMP_STR("devname", "%s", name); + NET_DUMP_UINT("ifindex", "(%d)", ifindex); + + if (mode == XDP_ATTACHED_MULTI) { + if (json_output) { + jsonw_name(json_wtr, "multi_attachments"); + jsonw_start_array(json_wtr); + } + xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true); + xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true); + xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true); + if (json_output) + jsonw_end_array(json_wtr); + } else if (mode == XDP_ATTACHED_DRV) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false); + } else if (mode == XDP_ATTACHED_SKB) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false); + } else if (mode == XDP_ATTACHED_HW) { + xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false); + } + + NET_END_OBJECT_FINAL; + return 0; +} + +int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb) +{ + if (!tb[IFLA_XDP]) + return 0; + + return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index, + nla_getattr_str(tb[IFLA_IFNAME])); +} + +static int do_bpf_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + + if (nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (!tb[TCA_ACT_BPF_PARMS]) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_OBJECT_NESTED2; + if (tb[TCA_ACT_BPF_NAME]) + NET_DUMP_STR("name", "%s", + nla_getattr_str(tb[TCA_ACT_BPF_NAME])); + if (tb[TCA_ACT_BPF_ID]) + NET_DUMP_UINT("id", " id %u", + nla_getattr_u32(tb[TCA_ACT_BPF_ID])); + NET_END_OBJECT_NESTED; + return 0; +} + +static int do_dump_one_act(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX + 1]; + + if (!attr) + return 0; + + if (nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_ACT_KIND] && strcmp(nla_data(tb[TCA_ACT_KIND]), "bpf") == 0) + return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]); + + return 0; +} + +static int do_bpf_act_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; + int act, ret; + + if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + NET_START_ARRAY("act", " %s ["); + for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) { + ret = do_dump_one_act(tb[act]); + if (ret) + break; + } + NET_END_ARRAY("] "); + + return ret; +} + +static int do_bpf_filter_dump(struct nlattr *attr) +{ + struct nlattr *tb[TCA_BPF_MAX + 1]; + int ret; + + if (nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0) + return -LIBBPF_ERRNO__NLPARSE; + + if (tb[TCA_BPF_NAME]) + NET_DUMP_STR("name", " %s", nla_getattr_str(tb[TCA_BPF_NAME])); + if (tb[TCA_BPF_ID]) + NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[TCA_BPF_ID])); + if (tb[TCA_BPF_ACT]) { + ret = do_bpf_act_dump(tb[TCA_BPF_ACT]); + if (ret) + return ret; + } + + return 0; +} + +int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind, + const char *devname, int ifindex) +{ + int ret = 0; + + if (tb[TCA_OPTIONS] && strcmp(nla_data(tb[TCA_KIND]), "bpf") == 0) { + NET_START_OBJECT; + if (devname[0] != '\0') + NET_DUMP_STR("devname", "%s", devname); + NET_DUMP_UINT("ifindex", "(%u)", ifindex); + NET_DUMP_STR("kind", " %s", kind); + ret = do_bpf_filter_dump(tb[TCA_OPTIONS]); + NET_END_OBJECT_FINAL; + } + + return ret; +} diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h new file mode 100644 index 000000000000..0788cfbbed0e --- /dev/null +++ b/tools/bpf/bpftool/netlink_dumper.h @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (C) 2018 Facebook + +#ifndef _NETLINK_DUMPER_H_ +#define _NETLINK_DUMPER_H_ + +#define NET_START_OBJECT \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ +} + +#define NET_START_OBJECT_NESTED(name) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_object(json_wtr); \ + } else { \ + fprintf(stderr, "%s {", name); \ + } \ +} + +#define NET_START_OBJECT_NESTED2 \ +{ \ + if (json_output) \ + jsonw_start_object(json_wtr); \ + else \ + fprintf(stderr, "{"); \ +} + +#define NET_END_OBJECT_NESTED \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stderr, "}"); \ +} + +#define NET_END_OBJECT \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ +} + +#define NET_END_OBJECT_FINAL \ +{ \ + if (json_output) \ + jsonw_end_object(json_wtr); \ + else \ + fprintf(stderr, "\n"); \ +} + +#define NET_START_ARRAY(name, fmt_str) \ +{ \ + if (json_output) { \ + jsonw_name(json_wtr, name); \ + jsonw_start_array(json_wtr); \ + } else { \ + fprintf(stderr, fmt_str, name); \ + } \ +} + +#define NET_END_ARRAY(endstr) \ +{ \ + if (json_output) \ + jsonw_end_array(json_wtr); \ + else \ + fprintf(stderr, "%s", endstr); \ +} + +#define NET_DUMP_UINT(name, fmt_str, val) \ +{ \ + if (json_output) \ + jsonw_uint_field(json_wtr, name, val); \ + else \ + fprintf(stderr, fmt_str, val); \ +} + +#define NET_DUMP_STR(name, fmt_str, str) \ +{ \ + if (json_output) \ + jsonw_string_field(json_wtr, name, str);\ + else \ + fprintf(stderr, fmt_str, str); \ +} + +#define NET_DUMP_STR_ONLY(str) \ +{ \ + if (json_output) \ + jsonw_string(json_wtr, str); \ + else \ + fprintf(stderr, "%s ", str); \ +} + +#endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index dce960d22106..b1cd3bc8db70 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", }; static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index dbf6e8bd98ba..bbb2a8ef367c 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -286,7 +286,7 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ - if (i == num_records) { + if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); return 0; diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h index 6b0c36a58fcb..e56997288f2b 100644 --- a/tools/include/linux/lockdep.h +++ b/tools/include/linux/lockdep.h @@ -30,9 +30,12 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; gfp_t lockdep_reclaim_gfp; int pid; + int state; char comm[17]; }; +#define TASK_RUNNING 0 + extern struct task_struct *__curr(void); #define current (__curr()) diff --git a/tools/include/linux/nmi.h b/tools/include/linux/nmi.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/linux/nmi.h diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h index 664ced8cb1b0..e907ba6f15e5 100644 --- a/tools/include/tools/libc_compat.h +++ b/tools/include/tools/libc_compat.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause) /* Copyright (C) 2018 Netronome Systems, Inc. */ #ifndef __TOOLS_LIBC_COMPAT_H diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 42990676a55e..df4bedb9b01c 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -734,9 +734,11 @@ __SYSCALL(__NR_pkey_free, sys_pkey_free) __SYSCALL(__NR_statx, sys_statx) #define __NR_io_pgetevents 292 __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) +#define __NR_rseq 293 +__SYSCALL(__NR_rseq, sys_rseq) #undef __NR_syscalls -#define __NR_syscalls 293 +#define __NR_syscalls 294 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 9c660e1688ab..300f336633f2 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -687,6 +687,15 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_ASPECT_RATIO 4 +/** + * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS + * + * If set to 1, the DRM core will expose special connectors to be used for + * writing back to memory the scene setup in the commit. Depends on client + * also supporting DRM_CLIENT_CAP_ATOMIC + */ +#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 + /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 66917a4eba27..aa5ccd2385ed 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -152,6 +152,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_SEG6LOCAL, BPF_PROG_TYPE_LIRC_MODE2, BPF_PROG_TYPE_SK_REUSEPORT, + BPF_PROG_TYPE_FLOW_DISSECTOR, }; enum bpf_attach_type { @@ -172,6 +173,7 @@ enum bpf_attach_type { BPF_CGROUP_UDP4_SENDMSG, BPF_CGROUP_UDP6_SENDMSG, BPF_LIRC_MODE2, + BPF_FLOW_DISSECTOR, __MAX_BPF_ATTACH_TYPE }; @@ -2333,6 +2335,7 @@ struct __sk_buff { /* ... here. */ __u32 data_meta; + struct bpf_flow_keys *flow_keys; }; struct bpf_tunnel_key { @@ -2778,4 +2781,27 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; +struct bpf_flow_keys { + __u16 nhoff; + __u16 thoff; + __u16 addr_proto; /* ETH_P_* of valid addrs */ + __u8 is_frag; + __u8 is_first_frag; + __u8 is_encap; + __u8 ip_proto; + __be16 n_proto; + __be16 sport; + __be16 dport; + union { + struct { + __be32 ipv4_src; + __be32 ipv4_dst; + }; + struct { + __u32 ipv6_src[4]; /* in6_addr; network order */ + __u32 ipv6_dst[4]; /* in6_addr; network order */ + }; + }; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 141cbfdc5865..58faab897201 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -165,6 +165,8 @@ enum { IFLA_CARRIER_UP_COUNT, IFLA_CARRIER_DOWN_COUNT, IFLA_NEW_IFINDEX, + IFLA_MIN_MTU, + IFLA_MAX_MTU, __IFLA_MAX }; @@ -335,6 +337,7 @@ enum { IFLA_BRPORT_GROUP_FWD_MASK, IFLA_BRPORT_NEIGH_SUPPRESS, IFLA_BRPORT_ISOLATED, + IFLA_BRPORT_BACKUP_PORT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -460,6 +463,16 @@ enum { #define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1) +/* XFRM section */ +enum { + IFLA_XFRM_UNSPEC, + IFLA_XFRM_LINK, + IFLA_XFRM_IF_ID, + __IFLA_XFRM_MAX +}; + +#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1) + enum macsec_validation_type { MACSEC_VALIDATE_DISABLED = 0, MACSEC_VALIDATE_CHECK = 1, @@ -922,6 +935,7 @@ enum { XDP_ATTACHED_DRV, XDP_ATTACHED_SKB, XDP_ATTACHED_HW, + XDP_ATTACHED_MULTI, }; enum { @@ -930,6 +944,9 @@ enum { IFLA_XDP_ATTACHED, IFLA_XDP_FLAGS, IFLA_XDP_PROG_ID, + IFLA_XDP_DRV_PROG_ID, + IFLA_XDP_SKB_PROG_ID, + IFLA_XDP_HW_PROG_ID, __IFLA_XDP_MAX, }; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6270a3b38e9..07548de5c988 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -949,6 +949,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 +#define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 #ifdef KVM_CAP_IRQ_ROUTING @@ -1391,6 +1394,9 @@ struct kvm_enc_region { /* Available with KVM_CAP_HYPERV_EVENTFD */ #define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) +/* Available with KVM_CAP_NESTED_STATE */ +#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) +#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) /* Secure Encrypted Virtualization command */ enum sev_cmd_id { diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index eeb787b1c53c..f35eb72739c0 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -144,7 +144,7 @@ enum perf_event_sample_format { PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ - __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, + __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; /* diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index c51f8e5cc608..84c3de89696a 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -65,6 +65,7 @@ struct vhost_iotlb_msg { }; #define VHOST_IOTLB_MSG 0x1 +#define VHOST_IOTLB_MSG_V2 0x2 struct vhost_msg { int type; @@ -74,6 +75,15 @@ struct vhost_msg { }; }; +struct vhost_msg_v2 { + __u32 type; + __u32 reserved; + union { + struct vhost_iotlb_msg iotlb; + __u8 padding[64]; + }; +}; + struct vhost_memory_region { __u64 guest_phys_addr; __u64 memory_size; /* bytes */ @@ -160,6 +170,14 @@ struct vhost_memory { #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ struct vhost_vring_state) +/* Set or get vhost backend capability */ + +/* Use message type V2 */ +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 + +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) + /* VHOST_NET specific defines */ /* Attach virtio net ring to a raw socket, or tap device. diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 439b8a27488d..195ba486640f 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1325,7 +1325,7 @@ class Tui(object): msg = '' while True: self.screen.erase() - self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' % + self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' % DELAY_DEFAULT, curses.A_BOLD) self.screen.addstr(4, 0, msg) self.screen.addstr(2, 0, 'Change delay from %.1fs to ' % diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 13a861135127..7bc31c905018 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 60aa4ca8b2c5..3878a26a2071 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,16 +28,8 @@ #include <linux/bpf.h> #include "bpf.h" #include "libbpf.h" -#include "nlattr.h" -#include <linux/rtnetlink.h> -#include <linux/if_link.h> -#include <sys/socket.h> #include <errno.h> -#ifndef SOL_NETLINK -#define SOL_NETLINK 270 -#endif - /* * When building perf, unistd.h is overridden. __NR_bpf is * required to be defined explicitly. @@ -499,127 +491,6 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); } -int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) -{ - struct sockaddr_nl sa; - int sock, seq = 0, len, ret = -1; - char buf[4096]; - struct nlattr *nla, *nla_xdp; - struct { - struct nlmsghdr nh; - struct ifinfomsg ifinfo; - char attrbuf[64]; - } req; - struct nlmsghdr *nh; - struct nlmsgerr *err; - socklen_t addrlen; - int one = 1; - - memset(&sa, 0, sizeof(sa)); - sa.nl_family = AF_NETLINK; - - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (sock < 0) { - return -errno; - } - - if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, - &one, sizeof(one)) < 0) { - fprintf(stderr, "Netlink error reporting not supported\n"); - } - - if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { - ret = -errno; - goto cleanup; - } - - addrlen = sizeof(sa); - if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { - ret = -errno; - goto cleanup; - } - - if (addrlen != sizeof(sa)) { - ret = -LIBBPF_ERRNO__INTERNAL; - goto cleanup; - } - - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; - req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; - - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ - if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; - } - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; - } - - len = recv(sock, buf, sizeof(buf), 0); - if (len < 0) { - ret = -errno; - goto cleanup; - } - - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); - nh = NLMSG_NEXT(nh, len)) { - if (nh->nlmsg_pid != sa.nl_pid) { - ret = -LIBBPF_ERRNO__WRNGPID; - goto cleanup; - } - if (nh->nlmsg_seq != seq) { - ret = -LIBBPF_ERRNO__INVSEQ; - goto cleanup; - } - switch (nh->nlmsg_type) { - case NLMSG_ERROR: - err = (struct nlmsgerr *)NLMSG_DATA(nh); - if (!err->error) - continue; - ret = err->error; - nla_dump_errormsg(nh); - goto cleanup; - case NLMSG_DONE: - break; - default: - break; - } - } - - ret = 0; - -cleanup: - close(sock); - return ret; -} - int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2abd0f112627..4f8d43ae20d2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -50,6 +50,7 @@ #include "libbpf.h" #include "bpf.h" #include "btf.h" +#include "str_error.h" #ifndef EM_BPF #define EM_BPF 247 @@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) obj->efile.fd = open(obj->path, O_RDONLY); if (obj->efile.fd < 0) { char errmsg[STRERR_BUFSIZE]; - char *cp = strerror_r(errno, errmsg, sizeof(errmsg)); + char *cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("failed to open %s: %s\n", obj->path, cp); return -errno; @@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) data->d_size, name, idx); if (err) { char errmsg[STRERR_BUFSIZE]; - char *cp = strerror_r(-err, errmsg, - sizeof(errmsg)); + char *cp = str_error(-err, errmsg, sizeof(errmsg)); pr_warning("failed to alloc program %s (%s): %s", name, obj->path, cp); @@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj) *pfd = bpf_create_map_xattr(&create_attr); if (*pfd < 0 && create_attr.btf_key_type_id) { - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, cp, errno); create_attr.btf_fd = 0; @@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj) size_t j; err = *pfd; - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("failed to create map (name: '%s'): %s\n", map->name, cp); for (j = 0; j < i; j++) @@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, } ret = -LIBBPF_ERRNO__LOAD; - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { @@ -1502,6 +1502,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: return false; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_KPROBE: @@ -1654,7 +1655,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("failed to statfs %s: %s\n", dir, cp); err = -errno; } @@ -1690,7 +1691,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, } if (bpf_obj_pin(prog->instances.fds[instance], path)) { - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("failed to pin program: %s\n", cp); return -errno; } @@ -1708,7 +1709,7 @@ static int make_dir(const char *path) err = -errno; if (err) { - cp = strerror_r(-err, errmsg, sizeof(errmsg)); + cp = str_error(-err, errmsg, sizeof(errmsg)); pr_warning("failed to mkdir %s: %s\n", path, cp); } return err; @@ -1770,7 +1771,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } if (bpf_obj_pin(map->fd, path)) { - cp = strerror_r(errno, errmsg, sizeof(errmsg)); + cp = str_error(errno, errmsg, sizeof(errmsg)); pr_warning("failed to pin map: %s\n", cp); return -errno; } @@ -2121,6 +2122,7 @@ static const struct { BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB), BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG), BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2), + BPF_PROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR), BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND), BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND), BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT), @@ -2336,7 +2338,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, bpf_program__set_expected_attach_type(prog, expected_attach_type); - if (!bpf_program__is_function_storage(prog, obj) && !first_prog) + if (!first_prog) first_prog = prog; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 96c55fac54c3..e3b00e23e181 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -46,6 +46,7 @@ enum libbpf_errno { LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */ + LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */ __LIBBPF_ERRNO__END, }; @@ -297,4 +298,19 @@ int bpf_perf_event_read_simple(void *mem, unsigned long size, unsigned long page_size, void **buf, size_t *buf_len, bpf_perf_event_print_t fn, void *priv); + +struct nlmsghdr; +struct nlattr; +typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t, + void *cookie); +int bpf_netlink_open(unsigned int *nl_pid); +int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg, + void *cookie); +int nl_get_class(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_class_nlmsg, void *cookie); +int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + dump_nlmsg_t dump_filter_nlmsg, void *cookie); #endif diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d9ba851bd7f9..2464ade3b326 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -42,6 +42,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", }; int libbpf_strerror(int err, char *buf, size_t size) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c new file mode 100644 index 000000000000..fde1d7bf8199 --- /dev/null +++ b/tools/lib/bpf/netlink.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <memory.h> +#include <unistd.h> +#include <linux/bpf.h> +#include <linux/rtnetlink.h> +#include <sys/socket.h> +#include <errno.h> +#include <time.h> + +#include "bpf.h" +#include "libbpf.h" +#include "nlattr.h" + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +int bpf_netlink_open(__u32 *nl_pid) +{ + struct sockaddr_nl sa; + socklen_t addrlen; + int one = 1, ret; + int sock; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) + return -errno; + + if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)) < 0) { + fprintf(stderr, "Netlink error reporting not supported\n"); + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + ret = -errno; + goto cleanup; + } + + addrlen = sizeof(sa); + if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) { + ret = -errno; + goto cleanup; + } + + if (addrlen != sizeof(sa)) { + ret = -LIBBPF_ERRNO__INTERNAL; + goto cleanup; + } + + *nl_pid = sa.nl_pid; + return sock; + +cleanup: + close(sock); + return ret; +} + +static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, dump_nlmsg_t fn, + void *cookie) +{ + bool multipart = true; + struct nlmsgerr *err; + struct nlmsghdr *nh; + char buf[4096]; + int len, ret; + + while (multipart) { + multipart = false; + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + ret = -errno; + goto done; + } + + if (len == 0) + break; + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != nl_pid) { + ret = -LIBBPF_ERRNO__WRNGPID; + goto done; + } + if (nh->nlmsg_seq != seq) { + ret = -LIBBPF_ERRNO__INVSEQ; + goto done; + } + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + nla_dump_errormsg(nh); + goto done; + case NLMSG_DONE: + return 0; + default: + break; + } + if (_fn) { + ret = _fn(nh, fn, cookie); + if (ret) + return ret; + } + } + } + ret = 0; +done: + return ret; +} + +int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + int sock, seq = 0, ret; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + __u32 nl_pid; + + sock = bpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | IFLA_XDP; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FD; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = IFLA_XDP_FLAGS; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + ret = -errno; + goto cleanup; + } + ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); + +cleanup: + close(sock); + return ret; +} + +static int __dump_link_nlmsg(struct nlmsghdr *nlh, dump_nlmsg_t dump_link_nlmsg, + void *cookie) +{ + struct nlattr *tb[IFLA_MAX + 1], *attr; + struct ifinfomsg *ifi = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); + attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_link_nlmsg(cookie, ifi, tb); +} + +int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg, + void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, + dump_link_nlmsg, cookie); +} + +static int __dump_class_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_class_nlmsg(cookie, t, tb); +} + +int nl_get_class(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_class_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTCLASS, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, + dump_class_nlmsg, cookie); +} + +static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_qdisc_nlmsg(cookie, t, tb); +} + +int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETQDISC, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, + dump_qdisc_nlmsg, cookie); +} + +static int __dump_filter_nlmsg(struct nlmsghdr *nlh, + dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct nlattr *tb[TCA_MAX + 1], *attr; + struct tcmsg *t = NLMSG_DATA(nlh); + int len; + + len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); + attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); + if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) + return -LIBBPF_ERRNO__NLPARSE; + + return dump_filter_nlmsg(cookie, t, tb); +} + +int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + dump_nlmsg_t dump_filter_nlmsg, void *cookie) +{ + struct { + struct nlmsghdr nlh; + struct tcmsg t; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), + .nlh.nlmsg_type = RTM_GETTFILTER, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .t.tcm_family = AF_UNSPEC, + .t.tcm_ifindex = ifindex, + .t.tcm_parent = handle, + }; + int seq = time(NULL); + + req.nlh.nlmsg_seq = seq; + if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + return -errno; + + return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, + dump_filter_nlmsg, cookie); +} diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 4719434278b2..49f514119bdb 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -26,11 +26,6 @@ static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_FLAG] = 0, }; -static int nla_len(const struct nlattr *nla) -{ - return nla->nla_len - NLA_HDRLEN; -} - static struct nlattr *nla_next(const struct nlattr *nla, int *remaining) { int totlen = NLA_ALIGN(nla->nla_len); @@ -46,11 +41,6 @@ static int nla_ok(const struct nlattr *nla, int remaining) nla->nla_len <= remaining; } -static void *nla_data(const struct nlattr *nla) -{ - return (char *) nla + NLA_HDRLEN; -} - static int nla_type(const struct nlattr *nla) { return nla->nla_type & NLA_TYPE_MASK; @@ -114,8 +104,8 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh) * @see nla_validate * @return 0 on success or a negative error code. */ -static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - struct nla_policy *policy) +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy) { struct nlattr *nla; int rem, err; @@ -146,6 +136,25 @@ errout: return err; } +/** + * Create attribute index based on nested attribute + * @arg tb Index array to be filled (maxtype+1 elements). + * @arg maxtype Maximum attribute type expected and accepted. + * @arg nla Nested Attribute. + * @arg policy Attribute validation policy. + * + * Feeds the stream of attributes nested into the specified attribute + * to nla_parse(). + * + * @see nla_parse + * @return 0 on success or a negative error code. + */ +int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + struct nla_policy *policy) +{ + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); +} + /* dump netlink extended ack error message */ int nla_dump_errormsg(struct nlmsghdr *nlh) { diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 931a71f68f93..a6e2396bce7c 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -67,6 +67,44 @@ struct nla_policy { nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_data - head of payload + * @nla: netlink attribute + */ +static inline void *nla_data(const struct nlattr *nla) +{ + return (char *) nla + NLA_HDRLEN; +} + +static inline uint8_t nla_getattr_u8(const struct nlattr *nla) +{ + return *(uint8_t *)nla_data(nla); +} + +static inline uint32_t nla_getattr_u32(const struct nlattr *nla) +{ + return *(uint32_t *)nla_data(nla); +} + +static inline const char *nla_getattr_str(const struct nlattr *nla) +{ + return (const char *)nla_data(nla); +} + +/** + * nla_len - length of payload + * @nla: netlink attribute + */ +static inline int nla_len(const struct nlattr *nla) +{ + return nla->nla_len - NLA_HDRLEN; +} + +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + struct nla_policy *policy); +int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla, + struct nla_policy *policy); + int nla_dump_errormsg(struct nlmsghdr *nlh); #endif /* __NLATTR_H */ diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c new file mode 100644 index 000000000000..b8798114a357 --- /dev/null +++ b/tools/lib/bpf/str_error.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: LGPL-2.1 +#undef _GNU_SOURCE +#include <string.h> +#include <stdio.h> +#include "str_error.h" + +/* + * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl + * libc, while checking strerror_r() return to avoid having to check this in + * all places calling it. + */ +char *str_error(int err, char *dst, int len) +{ + int ret = strerror_r(err, dst, len); + if (ret) + snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); + return dst; +} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h new file mode 100644 index 000000000000..355b1db571d1 --- /dev/null +++ b/tools/lib/bpf/str_error.h @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: LGPL-2.1 +#ifndef BPF_STR_ERROR +#define BPF_STR_ERROR + +char *str_error(int err, char *dst, int len); +#endif // BPF_STR_ERROR diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 42261a9b280e..ac841bc5c35b 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt mv $@+ $@ ifdef USE_ASCIIDOCTOR -$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt +$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index b3d1b12a5081..5224ade3d5af 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -777,14 +777,12 @@ endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBBPF - $(call QUIET_INSTALL, lib) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' - $(call QUIET_INSTALL, include/bpf) \ - $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' - $(call QUIET_INSTALL, lib) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' - $(call QUIET_INSTALL, examples/bpf) \ - $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' + $(call QUIET_INSTALL, bpf-headers) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ + $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf' + $(call QUIET_INSTALL, bpf-examples) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ + $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' endif $(call QUIET_INSTALL, perf-archive) \ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index f013b115dc86..dbef716a1913 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -11,7 +11,8 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 out := $(OUTPUT)arch/arm64/include/generated/asm header := $(out)/syscalls.c -sysdef := $(srctree)/tools/include/uapi/asm-generic/unistd.h +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/ systbl := $(sysprf)/mksyscalltbl @@ -19,7 +20,7 @@ systbl := $(sysprf)/mksyscalltbl _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sysdef) $(systbl) - $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(sysdef) > $@ + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ clean:: $(call QUIET_CLEAN, arm64) $(RM) $(header) diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl index 52e197317d3e..2dbb8cade048 100755 --- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -11,7 +11,8 @@ gcc=$1 hostcc=$2 -input=$3 +incpath=$3 +input=$4 if ! test -r $input; then echo "Could not read input file" >&2 @@ -28,7 +29,6 @@ create_table_from_c() cat <<-_EoHEADER #include <stdio.h> - #define __ARCH_WANT_RENAMEAT #include "$input" int main(int argc, char *argv[]) { @@ -42,7 +42,7 @@ create_table_from_c() printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);" printf "}\n" - } | $hostcc -o $create_table_exe -x c - + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - $create_table_exe diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 20e7d74d86cd..10a44e946f77 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) #endif -#if !defined(_CALL_ELF) || _CALL_ELF != 2 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { char *sym = syma->name; +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Skip over any initial dot */ if (*sym == '.') sym++; +#endif /* Avoid "SyS" kernel syscall aliases */ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) @@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma, return SYMBOL_A; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Allow matching against dot variants */ int arch__compare_symbol_names(const char *namea, const char *nameb) { diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c1bd979b957b..613709cfbbd0 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -9,6 +9,7 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__bp_modify(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 8e2c5a38c3b9..586849ff83a0 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,3 +5,4 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index cc1802ff5410..d47d3f8e3c8e 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -24,6 +24,12 @@ struct test arch_tests[] = { .func = test__insn_x86, }, #endif +#if defined(__x86_64__) + { + .desc = "x86 bp modify", + .func = test__bp_modify, + }, +#endif { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c new file mode 100644 index 000000000000..f53e4406709f --- /dev/null +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ptrace.h> +#include <asm/ptrace.h> +#include <errno.h> +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +static noinline int bp_1(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static noinline int bp_2(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static int spawn_child(void) +{ + int child = fork(); + + if (child == 0) { + /* + * The child sets itself for as tracee and + * waits in signal for parent to trace it, + * then it calls bp_1 and quits. + */ + int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + if (err) { + pr_debug("failed to PTRACE_TRACEME\n"); + exit(1); + } + + raise(SIGCONT); + bp_1(); + exit(0); + } + + return child; +} + +/* + * This tests creates HW breakpoint, tries to + * change it and checks it was properly changed. + */ +static int bp_modify1(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_2 function + * - changes that breakponit to bp_1 function + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_2)) { + pr_debug("failed to set breakpoint, 1st time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint, 2nd time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +/* + * This tests creates HW breakpoint, tries to + * change it to bogus value and checks the original + * breakpoint is hit. + */ +static int bp_modify2(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_1 function + * - tries to change that breakpoint to (-1) address + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (!ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) { + pr_debug("failed, breakpoint set to bogus address\n"); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +int test__bp_modify(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1()); + TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2()); + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 20061cf42288..28cd6a17491b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -246,8 +246,14 @@ find_target: indirect_call: tok = strchr(endptr, '*'); - if (tok != NULL) - ops->target.addr = strtoull(tok + 1, NULL, 16); + if (tok != NULL) { + endptr++; + + /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). + * Do not parse such instruction. */ + if (strstr(endptr, "(%r") == NULL) + ops->target.addr = strtoull(endptr, NULL, 16); + } goto find_target; } @@ -276,7 +282,19 @@ bool ins__is_call(const struct ins *ins) return ins->ops == &call_ops || ins->ops == &s390_call_ops; } -static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms) +/* + * Prevents from matching commas in the comment section, e.g.: + * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast + */ +static inline const char *validate_comma(const char *c, struct ins_operands *ops) +{ + if (ops->raw_comment && c > ops->raw_comment) + return NULL; + + return c; +} + +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -285,6 +303,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op }; const char *c = strchr(ops->raw, ','); u64 start, end; + + ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + c = validate_comma(c, ops); + /* * Examples of lines to parse for the _cpp_lex_token@@Base * function: @@ -304,6 +326,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) { c = strchr(c, ','); + c = validate_comma(c, ops); if (c++ != NULL) ops->target.addr = strtoull(c, NULL, 16); } @@ -361,9 +384,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); c = strchr(ops->raw, ','); + c = validate_comma(c, ops); + if (c != NULL) { const char *c2 = strchr(c + 1, ','); + c2 = validate_comma(c2, ops); /* check for 3-op insn */ if (c2 != NULL) c = c2; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 005a5fe8a8c6..5399ba2321bb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -22,6 +22,7 @@ struct ins { struct ins_operands { char *raw; + char *raw_comment; struct { char *raw; char *name; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c980bbff6353..1a61628a1c12 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -251,8 +251,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); - if (evsel != NULL) - perf_evsel__init(evsel, attr, idx); + if (!evsel) + return NULL; + perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 36d0763311ef..6a6929f208b4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -576,6 +576,13 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg, return NULL; } +static bool map__contains_symbol(struct map *map, struct symbol *sym) +{ + u64 ip = map->unmap_ip(map, sym->start); + + return ip >= map->start && ip < map->end; +} + struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { @@ -591,6 +598,10 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, if (sym == NULL) continue; + if (!map__contains_symbol(pos, sym)) { + sym = NULL; + continue; + } if (mapp != NULL) *mapp = pos; goto out; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index c85d0d1a65ed..7b0ca7cbb7de 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -377,7 +377,7 @@ out: static int record_saved_cmdline(void) { - unsigned int size; + unsigned long long size; char *path; struct stat st; int ret, err = 0; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 920b1d58a068..e76214f8d596 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -164,16 +164,15 @@ void parse_ftrace_printk(struct tep_handle *pevent, void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { - char *comm; + char comm[17]; /* Max comm length in the kernel is 16. */ char *line; char *next = NULL; int pid; line = strtok_r(file, "\n", &next); while (line) { - sscanf(line, "%d %ms", &pid, &comm); - tep_register_comm(pevent, comm, pid); - free(comm); + if (sscanf(line, "%d %16s", &pid, comm) == 2) + tep_register_comm(pevent, comm, pid); line = strtok_r(NULL, "\n", &next); } } diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index 72c25a3cb658..d9a725478375 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -6,7 +6,7 @@ TEST_PROGS := run.sh include ../lib.mk -all: +all: khdr @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/config index b4ad748a9dd9..b4ad748a9dd9 100644 --- a/tools/testing/selftests/android/ion/config +++ b/tools/testing/selftests/android/config diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile index e03695287f76..88cfe88e466f 100644 --- a/tools/testing/selftests/android/ion/Makefile +++ b/tools/testing/selftests/android/ion/Makefile @@ -10,6 +10,8 @@ $(TEST_GEN_FILES): ipcsocket.c ionutils.c TEST_PROGS := ion_test.sh +KSFT_KHDR_INSTALL := 1 +top_srcdir = ../../../../.. include ../../lib.mk $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 49938d72cf63..8a60c9b9892d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -19,3 +19,9 @@ test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user +test_skb_cgroup_id_user +test_socket_cookie +test_cgroup_storage +test_select_reuseport +test_flow_dissector +flow_dissector_load diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fff7fb1285fc..fd3851d5c079 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o + test_skb_cgroup_id_kern.o bpf_flow.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -47,10 +47,12 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh + test_skb_cgroup_id.sh \ + test_flow_dissector.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ + flow_dissector_load test_flow_dissector include ../lib.mk diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c new file mode 100644 index 000000000000..107350a7821d --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <limits.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/icmp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_packet.h> +#include <sys/socket.h> +#include <linux/if_tunnel.h> +#include <linux/mpls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + __u16 nhoff = skb->flow_keys->nhoff; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (nhoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + nhoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("dissect") +int _dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + keys->nhoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + keys->nhoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + keys->nhoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + keys->nhoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->thoff = keys->nhoff; + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + struct bpf_flow_keys *keys = skb->flow_keys; + void *data = (void *)(long)skb->data; + struct iphdr *iph, _iph; + bool done = false; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + keys->nhoff += iph->ihl << 2; + if (data + keys->nhoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + keys->nhoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octets and does not include the first 8 bytes + * of the header + */ + skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + keys->nhoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct mpls_label *mpls, _mpls; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + keys->nhoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index b4994a94968b..3655508f95fd 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -18,3 +18,4 @@ CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m CONFIG_VXLAN=y CONFIG_GENEVE=y +CONFIG_NET_CLS_FLOWER=m diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c new file mode 100644 index 000000000000..d3273b5b3173 --- /dev/null +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; +const char *cfg_map_name = "jmp_table"; +bool cfg_attach = true; +char *cfg_section_name; +char *cfg_path_name; + +static void load_and_attach_program(void) +{ + struct bpf_program *prog, *main_prog; + struct bpf_map *prog_array; + int i, fd, prog_fd, ret; + struct bpf_object *obj; + int prog_array_fd; + + ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj, + &prog_fd); + if (ret) + error(1, 0, "bpf_prog_load %s", cfg_path_name); + + main_prog = bpf_object__find_program_by_title(obj, cfg_section_name); + if (!main_prog) + error(1, 0, "bpf_object__find_program_by_title %s", + cfg_section_name); + + prog_fd = bpf_program__fd(main_prog); + if (prog_fd < 0) + error(1, 0, "bpf_program__fd"); + + prog_array = bpf_object__find_map_by_name(obj, cfg_map_name); + if (!prog_array) + error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name); + + prog_array_fd = bpf_map__fd(prog_array); + if (prog_array_fd < 0) + error(1, 0, "bpf_map__fd %s", cfg_map_name); + + i = 0; + bpf_object__for_each_program(prog, obj) { + fd = bpf_program__fd(prog); + if (fd < 0) + error(1, 0, "bpf_program__fd"); + + if (fd != prog_fd) { + printf("%d: %s\n", i, bpf_program__title(prog, false)); + bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); + ++i; + } + } + + ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0); + if (ret) + error(1, 0, "bpf_prog_attach %s", cfg_path_name); + + ret = bpf_object__pin(obj, cfg_pin_path); + if (ret) + error(1, 0, "bpf_object__pin %s", cfg_pin_path); + +} + +static void detach_program(void) +{ + char command[64]; + int ret; + + ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (ret) + error(1, 0, "bpf_prog_detach"); + + /* To unpin, it is necessary and sufficient to just remove this dir */ + sprintf(command, "rm -r %s", cfg_pin_path); + ret = system(command); + if (ret) + error(1, errno, command); +} + +static void parse_opts(int argc, char **argv) +{ + bool attach = false; + bool detach = false; + int c; + + while ((c = getopt(argc, argv, "adp:s:")) != -1) { + switch (c) { + case 'a': + if (detach) + error(1, 0, "attach/detach are exclusive"); + attach = true; + break; + case 'd': + if (attach) + error(1, 0, "attach/detach are exclusive"); + detach = true; + break; + case 'p': + if (cfg_path_name) + error(1, 0, "only one prog name can be given"); + + cfg_path_name = optarg; + break; + case 's': + if (cfg_section_name) + error(1, 0, "only one section can be given"); + + cfg_section_name = optarg; + break; + } + } + + if (detach) + cfg_attach = false; + + if (cfg_attach && !cfg_path_name) + error(1, 0, "must provide a path to the BPF program"); + + if (cfg_attach && !cfg_section_name) + error(1, 0, "must provide a section name"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + if (cfg_attach) + load_and_attach_program(); + else + detach_program(); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c new file mode 100644 index 000000000000..12b784afba31 --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inject packets with all sorts of encapsulation into the kernel. + * + * IPv4/IPv6 outer layer 3 + * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. + * IPv4/IPv6 inner layer 3 + */ + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <error.h> +#include <errno.h> +#include <linux/if_packet.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <netinet/ip.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define CFG_PORT_INNER 8000 + +/* Add some protocol definitions that do not exist in userspace */ + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __attribute__((packed)); + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen:5, + control:1, + version:2; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:2, + control:1, + hlen:5; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static uint8_t cfg_dsfield_inner; +static uint8_t cfg_dsfield_outer; +static uint8_t cfg_encap_proto; +static bool cfg_expect_failure = false; +static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ +static int cfg_l3_inner = AF_UNSPEC; +static int cfg_l3_outer = AF_UNSPEC; +static int cfg_num_pkt = 10; +static int cfg_num_secs = 0; +static char cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static int cfg_port_gue = 6080; +static bool cfg_only_rx; +static bool cfg_only_tx; +static int cfg_src_port = 9; + +static char buf[ETH_DATA_LEN]; + +#define INIT_ADDR4(name, addr4, port) \ + static struct sockaddr_in name = { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + }; + +#define INIT_ADDR6(name, addr6, port) \ + static struct sockaddr_in6 name = { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + }; + +INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) +INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) +INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) +INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) + +INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void util_printaddr(const char *msg, struct sockaddr *addr) +{ + unsigned long off = 0; + char nbuf[INET6_ADDRSTRLEN]; + + switch (addr->sa_family) { + case PF_INET: + off = __builtin_offsetof(struct sockaddr_in, sin_addr); + break; + case PF_INET6: + off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); + break; + default: + error(1, 0, "printaddr: unsupported family %u\n", + addr->sa_family); + } + + if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, + sizeof(nbuf))) + error(1, errno, "inet_ntop"); + + fprintf(stderr, "%s: %s\n", msg, nbuf); +} + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static void build_ipv4_header(void *header, uint8_t proto, + uint32_t src, uint32_t dst, + int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t) dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + int payload_len, uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static uint16_t build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); + pseudo_sum += htons(IPPROTO_UDP); + pseudo_sum += udph->len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); + pseudo_sum += htons(ip6h->nexthdr); + pseudo_sum += ip6h->payload_len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static void build_udp_header(void *header, int payload_len, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(cfg_src_port); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph, len >> 1); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph, len >> 1); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(void) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0; + + if (cfg_l3_extra) + el3_len = l3_length(cfg_l3_extra); + + /* calculate header offsets */ + if (cfg_encap_proto) { + ol3_len = l3_length(cfg_l3_outer); + + if (cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= + sizeof(buf)) + error(1, 0, "packet too large\n"); + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + cfg_payload_char, cfg_payload_len); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; + + switch (cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + in_saddr4.sin_addr.s_addr, + in_daddr4.sin_addr.s_addr, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + &in_saddr6, &in_daddr6, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); + + if (!cfg_encap_proto) + return il3_len + il4_len + cfg_payload_len; + + switch (cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, cfg_encap_proto, + out_saddr4.sin_addr.s_addr, + out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, cfg_encap_proto, + &out_saddr6, &out_daddr6, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + } + + switch (cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + cfg_l3_inner == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + cfg_payload_len, + cfg_port_gue, cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + cfg_l3_inner == PF_INET ? ETH_P_IP + : ETH_P_IPV6); + break; + } + + switch (cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + extra_saddr4.sin_addr.s_addr, + extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + case PF_INET6: + build_ipv6_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + &extra_saddr6, &extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(void) +{ + int family, fd, ret; + + if (cfg_l3_extra) + family = cfg_l3_extra; + else if (cfg_l3_outer) + family = cfg_l3_outer; + else + family = cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (fd == -1) + error(1, errno, "socket tx"); + + if (cfg_l3_extra) { + if (cfg_l3_extra == PF_INET) + ret = connect(fd, (void *) &extra_daddr4, + sizeof(extra_daddr4)); + else + ret = connect(fd, (void *) &extra_daddr6, + sizeof(extra_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else if (cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (cfg_l3_outer == PF_INET) + ret = connect(fd, (void *) &out_daddr4, + sizeof(out_daddr4)); + else + ret = connect(fd, (void *) &out_daddr6, + sizeof(out_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else { + /* otherwise using loopback */ + if (cfg_l3_inner == PF_INET) + ret = connect(fd, (void *) &in_daddr4, + sizeof(in_daddr4)); + else + ret = connect(fd, (void *) &in_daddr6, + sizeof(in_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(void) +{ + int fd, ret; + + fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (cfg_l3_inner == PF_INET) + ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); + else + ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); + if (ret) + error(1, errno, "bind rx"); + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + if (ret == -1) + error(1, errno, "send"); + if (ret != len) + error(1, errno, "send: len (%d < %d)\n", ret, len); + + return 1; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + if (ret == -1) + error(1, errno, "poll"); + if (ret && !(pfd.revents & POLLIN)) + error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); + + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv"); + if (rbuf != cfg_payload_char) + error(1, 0, "recv: payload mismatch"); + num++; + }; + + return num; +} + +static int do_main(void) +{ + unsigned long tstop, treport, tcur; + int fdt = -1, fdr = -1, len, tx = 0, rx = 0; + + if (!cfg_only_tx) + fdr = setup_rx(); + if (!cfg_only_rx) + fdt = setup_tx(); + + len = build_packet(); + + tcur = util_gettime(); + treport = tcur + 1000; + tstop = tcur + (cfg_num_secs * 1000); + + while (1) { + if (!cfg_only_rx) + tx += do_tx(fdt, buf, len); + + if (!cfg_only_tx) + rx += do_rx(fdr); + + if (cfg_num_secs) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + if (tcur >= treport) { + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + tx = 0; + rx = 0; + treport = tcur + 1000; + } + } else { + if (tx == cfg_num_pkt) + break; + } + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + do_poll(fdr, POLLIN, tstop - tcur); + rx += do_rx(fdr); + } + } + + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + + if (fdr != -1 && close(fdr)) + error(1, errno, "close rx"); + if (fdt != -1 && close(fdt)) + error(1, errno, "close tx"); + + /* + * success (== 0) only if received all packets + * unless failure is expected, in which case none must arrive. + */ + if (cfg_expect_failure) + return rx != 0; + else + return rx != tx; +} + + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " + "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " + "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] " + "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n", + filepath); + exit(1); +} + +static void parse_addr(int family, void *addr, const char *optarg) +{ + int ret; + + ret = inet_pton(family, optarg, addr); + if (ret == -1) + error(1, errno, "inet_pton"); + if (ret == 0) + error(1, 0, "inet_pton: bad string"); +} + +static void parse_addr4(struct sockaddr_in *addr, const char *optarg) +{ + parse_addr(AF_INET, &addr->sin_addr, optarg); +} + +static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) +{ + parse_addr(AF_INET6, &addr->sin6_addr, optarg); +} + +static int parse_protocol_family(const char *filepath, const char *optarg) +{ + if (!strcmp(optarg, "4")) + return PF_INET; + if (!strcmp(optarg, "6")) + return PF_INET6; + + usage(filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { + switch (c) { + case 'd': + if (cfg_l3_outer == AF_UNSPEC) + error(1, 0, "-d must be preceded by -o"); + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_daddr4, optarg); + else + parse_addr6(&out_daddr6, optarg); + break; + case 'D': + if (cfg_l3_inner == AF_UNSPEC) + error(1, 0, "-D must be preceded by -i"); + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_daddr4, optarg); + else + parse_addr6(&in_daddr6, optarg); + break; + case 'e': + if (!strcmp(optarg, "gre")) + cfg_encap_proto = IPPROTO_GRE; + else if (!strcmp(optarg, "gue")) + cfg_encap_proto = IPPROTO_UDP; + else if (!strcmp(optarg, "bare")) + cfg_encap_proto = IPPROTO_IPIP; + else if (!strcmp(optarg, "none")) + cfg_encap_proto = IPPROTO_IP; /* == 0 */ + else + usage(argv[0]); + break; + case 'f': + cfg_src_port = strtol(optarg, NULL, 0); + break; + case 'F': + cfg_expect_failure = true; + break; + case 'h': + usage(argv[0]); + break; + case 'i': + if (!strcmp(optarg, "4")) + cfg_l3_inner = PF_INET; + else if (!strcmp(optarg, "6")) + cfg_l3_inner = PF_INET6; + else + usage(argv[0]); + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'o': + cfg_l3_outer = parse_protocol_family(argv[0], optarg); + break; + case 'O': + cfg_l3_extra = parse_protocol_family(argv[0], optarg); + break; + case 'R': + cfg_only_rx = true; + break; + case 's': + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_saddr4, optarg); + else + parse_addr6(&out_saddr6, optarg); + break; + case 'S': + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_saddr4, optarg); + else + parse_addr6(&in_saddr6, optarg); + break; + case 't': + cfg_num_secs = strtol(optarg, NULL, 0); + break; + case 'T': + cfg_only_tx = true; + break; + case 'x': + cfg_dsfield_outer = strtol(optarg, NULL, 0); + break; + case 'X': + cfg_dsfield_inner = strtol(optarg, NULL, 0); + break; + } + } + + if (cfg_only_rx && cfg_only_tx) + error(1, 0, "options: cannot combine rx-only and tx-only"); + + if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) + error(1, 0, "options: must specify outer with encap"); + else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and outer"); + else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and extra"); + + if (cfg_l3_inner == AF_UNSPEC) + cfg_l3_inner = AF_INET6; + if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) + cfg_encap_proto = IPPROTO_IPV6; + + /* RFC 6040 4.2: + * on decap, if outer encountered congestion (CE == 0x3), + * but inner cannot encode ECN (NoECT == 0x0), then drop packet. + */ + if (((cfg_dsfield_outer & 0x3) == 0x3) && + ((cfg_dsfield_inner & 0x3) == 0x0)) + cfg_expect_failure = true; +} + +static void print_opts(void) +{ + if (cfg_l3_inner == PF_INET6) { + util_printaddr("inner.dest6", (void *) &in_daddr6); + util_printaddr("inner.source6", (void *) &in_saddr6); + } else { + util_printaddr("inner.dest4", (void *) &in_daddr4); + util_printaddr("inner.source4", (void *) &in_saddr4); + } + + if (!cfg_l3_outer) + return; + + fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("outer.dest6", (void *) &out_daddr6); + util_printaddr("outer.source6", (void *) &out_saddr6); + } else { + util_printaddr("outer.dest4", (void *) &out_daddr4); + util_printaddr("outer.source4", (void *) &out_saddr4); + } + + if (!cfg_l3_extra) + return; + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("extra.dest6", (void *) &extra_daddr6); + util_printaddr("extra.source6", (void *) &extra_saddr6); + } else { + util_printaddr("extra.dest4", (void *) &extra_daddr4); + util_printaddr("extra.source4", (void *) &extra_saddr4); + } + +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + print_opts(); + return do_main(); +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh new file mode 100755 index 000000000000..c0fb073b5eab --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Load BPF flow dissector and verify it correctly dissects traffic +export TESTNAME=test_flow_dissector +unmount=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +# This test needs to be run in a network namespace with in_netns.sh. Check if +# this is the case and run it with in_netns.sh if it is being run in the root +# namespace. +if [[ -z $(ip netns identify $$) ]]; then + ../net/in_netns.sh "$0" "$@" + exit $? +fi + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + set +e + + # Cleanup + tc filter del dev lo ingress pref 1337 2> /dev/null + tc qdisc del dev lo ingress 2> /dev/null + ./flow_dissector_load -d 2> /dev/null + if [ $unmount -ne 0 ]; then + umount bpffs 2> /dev/null + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +# Mount BPF file system +if /bin/mount | grep /sys/fs/bpf > /dev/null; then + echo "bpffs already mounted" +else + echo "bpffs not mounted. Mounting..." + unmount=1 + /bin/mount bpffs /sys/fs/bpf -t bpf +fi + +# Attach BPF program +./flow_dissector_load -p bpf_flow.o -s dissect + +# Setup +tc qdisc add dev lo ingress + +echo "Testing IPv4..." +# Drops all IP/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 4 -f 8 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 4 -f 10 + +echo "Testing IPIP..." +# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +echo "Testing IPv4 + GRE..." +# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +tc filter del dev lo ingress pref 1337 + +echo "Testing IPv6..." +# Drops all IPv6/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 6 -f 8 +# Send 10 IPv6/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 6 -f 9 -F +# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 6 -f 10 + +exit 0 diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 6f54f84144a0..9b552c0fc47d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data) /* Test update without programs */ for (i = 0; i < 6; i++) { err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); - if (err) { + if (i < 2 && !err) { + printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n", + i, sfd[i]); + goto out_sockmap; + } else if (i >= 2 && err) { printf("Failed noprog update sockmap '%i:%i'\n", i, sfd[i]); goto out_sockmap; @@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data) } /* Test map update elem afterwards fd lives in fd and map_fd */ - for (i = 0; i < 6; i++) { + for (i = 2; i < 6; i++) { err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY); if (err) { printf("Failed map_fd_rx update sockmap %i '%i:%i'\n", @@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data) } /* Delete the elems without programs */ - for (i = 0; i < 6; i++) { + for (i = 2; i < 6; i++) { err = bpf_map_delete_elem(fd, &i); if (err) { printf("Failed delete sockmap %i '%i:%i'\n", diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0ef68204c84b..63a671803ed6 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -112,13 +112,13 @@ static void test_pkt_access(void) err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv4", + CHECK(err || retval, "ipv4", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv6", + CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); bpf_object__close(obj); @@ -153,14 +153,14 @@ static void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 74 || + CHECK(err || retval != XDP_TX || size != 74 || iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 114 || + CHECK(err || retval != XDP_TX || size != 114 || iph6->nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); @@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_DROP, + CHECK(err || retval != XDP_DROP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 54, + CHECK(err || retval != XDP_TX || size != 54, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); bpf_object__close(obj); @@ -254,14 +254,14 @@ static void test_l4lb(const char *file) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -343,14 +343,14 @@ static void test_xdp_noinline(void) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 54 || + CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 74 || + CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh new file mode 100755 index 000000000000..ffcd3953f94c --- /dev/null +++ b/tools/testing/selftests/bpf/with_addr.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# add private ipv4 and ipv6 addresses to loopback + +readonly V6_INNER='100::a/128' +readonly V4_INNER='192.168.0.1/32' + +if getopts ":s" opt; then + readonly SIT_DEV_NAME='sixtofourtest0' + readonly V6_SIT='2::/64' + readonly V4_SIT='172.17.0.1/32' + shift +fi + +fail() { + echo "error: $*" 1>&2 + exit 1 +} + +setup() { + ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' + ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' + + if [[ -n "${V6_SIT}" ]]; then + ip link add "${SIT_DEV_NAME}" type sit remote any local any \ + || fail 'failed to add sit' + ip link set dev "${SIT_DEV_NAME}" up \ + || fail 'failed to bring sit device up' + ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v6 SIT address' + ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v4 SIT address' + fi + + sleep 2 # avoid race causing bind to fail +} + +cleanup() { + if [[ -n "${V6_SIT}" ]]; then + ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" + ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" + ip link del "${SIT_DEV_NAME}" + fi + + ip -4 addr del "${V4_INNER}" dev lo + ip -6 addr del "${V6_INNER}" dev lo +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh new file mode 100755 index 000000000000..e24949ed3a20 --- /dev/null +++ b/tools/testing/selftests/bpf/with_tunnels.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# setup tunnels for flow dissection test + +readonly SUFFIX="test_$(mktemp -u XXXX)" +CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" + +setup() { + ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} + ip link add "gre_${SUFFIX}" type gre ${CONFIG} + ip link add "sit_${SUFFIX}" type sit ${CONFIG} + + echo "tunnels before test:" + ip tunnel show + + ip link set "ipip_${SUFFIX}" up + ip link set "gre_${SUFFIX}" up + ip link set "sit_${SUFFIX}" up +} + + +cleanup() { + ip tunnel del "ipip_${SUFFIX}" + ip tunnel del "gre_${SUFFIX}" + ip tunnel del "sit_${SUFFIX}" + + echo "tunnels after test:" + ip tunnel show +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 95eb3a53c381..adacda50a4b2 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1 +1,2 @@ test_memcontrol +test_core diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 1c5d2b2a583b..14c9fe284806 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -89,17 +89,28 @@ int cg_read(const char *cgroup, const char *control, char *buf, size_t len) int cg_read_strcmp(const char *cgroup, const char *control, const char *expected) { - size_t size = strlen(expected) + 1; + size_t size; char *buf; + int ret; + + /* Handle the case of comparing against empty string */ + if (!expected) + size = 32; + else + size = strlen(expected) + 1; buf = malloc(size); if (!buf) return -1; - if (cg_read(cgroup, control, buf, size)) + if (cg_read(cgroup, control, buf, size)) { + free(buf); return -1; + } - return strcmp(expected, buf); + ret = strcmp(expected, buf); + free(buf); + return ret; } int cg_read_strstr(const char *cgroup, const char *control, const char *needle) @@ -337,3 +348,24 @@ int is_swap_enabled(void) return cnt > 1; } + +int set_oom_adj_score(int pid, int score) +{ + char path[PATH_MAX]; + int fd, len; + + sprintf(path, "/proc/%d/oom_score_adj", pid); + + fd = open(path, O_WRONLY | O_APPEND); + if (fd < 0) + return fd; + + len = dprintf(fd, "%d", score); + if (len < 0) { + close(fd); + return len; + } + + close(fd); + return 0; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 1ff6f9f1abdc..9ac8b7958f83 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -40,3 +40,4 @@ extern int get_temp_fd(void); extern int alloc_pagecache(int fd, size_t size); extern int alloc_anon(const char *cgroup, void *arg); extern int is_swap_enabled(void); +extern int set_oom_adj_score(int pid, int score); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index cf0bddc9d271..28d321ba311b 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -2,6 +2,7 @@ #define _GNU_SOURCE #include <linux/limits.h> +#include <linux/oom.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -202,6 +203,36 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) return 0; } +static int alloc_anon_noexit(const char *cgroup, void *arg) +{ + int ppid = getppid(); + + if (alloc_anon(cgroup, arg)) + return -1; + + while (getppid() == ppid) + sleep(1); + + return 0; +} + +/* + * Wait until processes are killed asynchronously by the OOM killer + * If we exceed a timeout, fail. + */ +static int cg_test_proc_killed(const char *cgroup) +{ + int limit; + + for (limit = 10; limit > 0; limit--) { + if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0) + return 0; + + usleep(100000); + } + return -1; +} + /* * First, this test creates the following hierarchy: * A memory.min = 50M, memory.max = 200M @@ -964,6 +995,177 @@ cleanup: return ret; } +/* + * This test disables swapping and tries to allocate anonymous memory + * up to OOM with memory.group.oom set. Then it checks that all + * processes in the leaf (but not the parent) were killed. + */ +static int test_memcg_oom_group_leaf_events(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + + parent = cg_name(root, "memcg_test_0"); + child = cg_name(root, "memcg_test_0/memcg_test_1"); + + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_write(child, "memory.max", "50M")) + goto cleanup; + + if (cg_write(child, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(child, "memory.oom.group", "1")) + goto cleanup; + + cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); + cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); + cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); + if (!cg_run(child, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_test_proc_killed(child)) + goto cleanup; + + if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0) + goto cleanup; + + if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + if (parent) + cg_destroy(parent); + free(child); + free(parent); + + return ret; +} + +/* + * This test disables swapping and tries to allocate anonymous memory + * up to OOM with memory.group.oom set. Then it checks that all + * processes in the parent and leaf were killed. + */ +static int test_memcg_oom_group_parent_events(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child; + + parent = cg_name(root, "memcg_test_0"); + child = cg_name(root, "memcg_test_0/memcg_test_1"); + + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_write(parent, "memory.max", "80M")) + goto cleanup; + + if (cg_write(parent, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(parent, "memory.oom.group", "1")) + goto cleanup; + + cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60)); + cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); + cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1)); + + if (!cg_run(child, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_test_proc_killed(child)) + goto cleanup; + if (cg_test_proc_killed(parent)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + if (parent) + cg_destroy(parent); + free(child); + free(parent); + + return ret; +} + +/* + * This test disables swapping and tries to allocate anonymous memory + * up to OOM with memory.group.oom set. Then it checks that all + * processes were killed except those set with OOM_SCORE_ADJ_MIN + */ +static int test_memcg_oom_group_score_events(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + int safe_pid; + + memcg = cg_name(root, "memcg_test_0"); + + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_write(memcg, "memory.max", "50M")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.oom.group", "1")) + goto cleanup; + + safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); + if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN)) + goto cleanup; + + cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1)); + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3) + goto cleanup; + + if (kill(safe_pid, SIGKILL)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (memcg) + cg_destroy(memcg); + free(memcg); + + return ret; +} + + #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -978,6 +1180,9 @@ struct memcg_test { T(test_memcg_oom_events), T(test_memcg_swap_max), T(test_memcg_sock), + T(test_memcg_oom_group_leaf_events), + T(test_memcg_oom_group_parent_events), + T(test_memcg_oom_group_score_events), }; #undef T diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh new file mode 100644 index 000000000000..0150bb2741eb --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for switch behavior under MC overload. An issue in Spectrum chips +# causes throughput of UC traffic to drop severely when a switch is under heavy +# MC load. This issue can be overcome by putting the switch to MC-aware mode. +# This test verifies that UC performance stays intact even as the switch is +# under MC flood, and therefore that the MC-aware mode is enabled and correctly +# configured. +# +# Because mlxsw throttles CPU port, the traffic can't actually reach userspace +# at full speed. That makes it impossible to use iperf3 to simply measure the +# throughput, because many packets (that reach $h3) don't get to the kernel at +# all even in UDP mode (the situation is even worse in TCP mode, where one can't +# hope to see more than a couple Mbps). +# +# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. +# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore +# each gets a different priority and we can use per-prio ethtool counters to +# measure the throughput. In order to avoid prioritizing unicast traffic, prio +# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and +# thus TC 0). +# +# Mausezahn can't actually saturate the links unless it's using large frames. +# Thus we set MTU to 10K on all involved interfaces. Then both unicast and +# multicast traffic uses 8K frames. +# +# +-----------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | | | traffic | 192.0.2.129/28 | +# | multicast | | | e-qos-map 0:1 | +# | traffic | | | | +# | $h1 + <----- | | + $h2 | +# +-----|-----------------+ +--------------|-------------------+ +# | | +# +-----|-------------------------------------------------|-------------------+ +# | + $swp1 + $swp2 | +# | | >1Gbps | >1Gbps | +# | +---|----------------+ +----------|----------------+ | +# | | + $swp1.1 | | + $swp2.111 | | +# | | BR1 | SW | BR111 | | +# | | + $swp3.1 | | + $swp3.111 | | +# | +---|----------------+ +----------|----------------+ | +# | \_________________________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | prio qdisc: {0..7} -> 7 | +# +------------------------------------|--------------------------------------+ +# | +# +--|-----------------+ +# | + $h3 H3 | +# | | | +# | + $h3.111 | +# | 192.0.2.130/28 | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + test_mc_aware +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 +} + +h1_destroy() +{ + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.129/28 + ip link set dev $h2.111 type vlan egress-qos-map 0:1 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.130/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + + vlan_create $swp2 111 + vlan_create $swp3 111 + + ethtool -s $swp3 speed 1000 autoneg off + tc qdisc replace dev $swp3 root handle 3: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp3 master br1 + + ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 up + ip link set dev $swp2.111 master br111 + ip link set dev $swp3.111 master br111 +} + +switch_destroy() +{ + ip link del dev br111 + ip link del dev br1 + + tc qdisc del dev $swp3 root handle 3: + ethtool -s $swp3 autoneg on + + vlan_destroy $swp3 111 + vlan_destroy $swp2 111 + + mtu_restore $swp3 + ip link set dev $swp3 down + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h2 192.0.2.130 +} + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr + return 1 +} + +measure_uc_rate() +{ + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + sleep 1 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep $interval + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + echo $ir $er + exit $ret +} + +test_mc_aware() +{ + RET=0 + + local -a uc_rate + uc_rate=($(measure_uc_rate "UC-only")) + check_err $? "Could not get high enough UC-only ingress rate" + local ucth1=${uc_rate[1]} + + mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local -a uc_rate_2 + uc_rate_2=($(measure_uc_rate "UC+MC")) + check_err $? "Could not get high enough UC+MC ingress rate" + local ucth2=${uc_rate_2[1]} + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local deg=$(bc <<< " + scale=2 + ret = 100 * ($ucth1 - $ucth2) / $ucth1 + if (ret > 0) { ret } else { 0 } + ") + check_err $(bc <<< "$deg > 10") + + local interval=$((d1 - d0)) + local mc_ir=$(rate $u0 $u1 $interval) + local mc_er=$(rate $t0 $t1 $interval) + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "UC performace under MC overload" + + echo "UC-only throughput $(humanize $ucth1)" + echo "UC+MC throughput $(humanize $ucth2)" + echo "Degradation $deg %" + echo + echo "Full report:" + echo " UC only:" + echo " ingress UC throughput $(humanize ${uc_rate[0]})" + echo " egress UC throughput $(humanize ${uc_rate[1]})" + echo " UC+MC:" + echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" + echo " egress UC throughput $(humanize ${uc_rate_2[1]})" + echo " ingress MC throughput $(humanize $mc_ir)" + echo " egress MC throughput $(humanize $mc_er)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config new file mode 100644 index 000000000000..4e151f1005b2 --- /dev/null +++ b/tools/testing/selftests/efivarfs/config @@ -0,0 +1 @@ +CONFIG_EFIVAR_FS=y diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index ff8feca49746..ad1eeb14fda7 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -18,6 +18,7 @@ TEST_GEN_FILES := \ TEST_PROGS := run.sh +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_FILES): $(HEADERS) diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 1bbb47565c55..4665cdbf1a8d 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -21,11 +21,8 @@ endef CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ LDLIBS += -lmount -I/usr/include/libmount -$(BINARIES): ../../../gpio/gpio-utils.o ../../../../usr/include/linux/gpio.h +$(BINARIES):| khdr +$(BINARIES): ../../../gpio/gpio-utils.o ../../../gpio/gpio-utils.o: make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio - -../../../../usr/include/linux/gpio.h: - make -C ../../../.. headers_install INSTALL_HDR_PATH=$(shell pwd)/../../../../usr/ - diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 15e6b75fc3a5..a3edb2c8e43d 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -19,7 +19,6 @@ #define KSFT_FAIL 1 #define KSFT_XFAIL 2 #define KSFT_XPASS 3 -/* Treat skip as pass */ #define KSFT_SKIP 4 /* counters */ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 4202139d81d9..5c34752e1cff 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ cr4_cpuid_sync_test +platform_info_test set_sregs_test sync_regs_test vmx_tsc_adjust_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 03b0f551bedf..ec32dad3c3f0 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -6,7 +6,8 @@ UNAME_M := $(shell uname -m) LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c LIBKVM_x86_64 = lib/x86.c lib/vmx.c -TEST_GEN_PROGS_x86_64 = set_sregs_test +TEST_GEN_PROGS_x86_64 = platform_info_test +TEST_GEN_PROGS_x86_64 += set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test @@ -20,7 +21,7 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. -LDFLAGS += -lpthread +LDFLAGS += -pthread # After inclusion, $(OUTPUT) is defined and # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ @@ -37,9 +38,6 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) $(AR) crs $@ $^ -$(LINUX_HDR_PATH): - make -C $(top_srcdir) headers_install - -all: $(STATIC_LIBS) $(LINUX_HDR_PATH) +all: $(STATIC_LIBS) $(TEST_GEN_PROGS): $(STATIC_LIBS) -$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH) +$(STATIC_LIBS):| khdr diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bb5a25fb82c6..3acf9a91704c 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -50,6 +50,7 @@ enum vm_mem_backing_src_type { }; int kvm_check_cap(long cap); +int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); @@ -108,6 +109,9 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value); const char *exit_reason_str(unsigned int exit_reason); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index e9ba389c48db..6fd8c089cafc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -63,6 +63,29 @@ int kvm_check_cap(long cap) return ret; } +/* VM Enable Capability + * + * Input Args: + * vm - Virtual Machine + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VM. + */ +int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) +{ + int ret; + + ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + return ret; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -1220,6 +1243,72 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, ret, errno); } +/* VCPU Get MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * + * Output Args: None + * + * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. + * + * Get value of MSR for VCPU. + */ +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + + return buffer.entry.data; +} + +/* VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * + * Set value of MSR for VCPU. + */ +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + memset(&buffer, 0, sizeof(buffer)); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + buffer.entry.data = msr_value; + r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); +} + /* VM VCPU Args Set * * Input Args: diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c new file mode 100644 index 000000000000..3764e7121265 --- /dev/null +++ b/tools/testing/selftests/kvm/platform_info_test.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_MSR_PLATFORM_INFO + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of controlling guest access to + * MSR_PLATFORM_INFO. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" + +#define VCPU_ID 0 +#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 + +static void guest_code(void) +{ + uint64_t msr_platform_info; + + for (;;) { + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_SYNC(msr_platform_info); + asm volatile ("inc %r11"); + } +} + +static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) +{ + struct kvm_enable_cap cap = {}; + + cap.cap = KVM_CAP_MSR_PLATFORM_INFO; + cap.flags = 0; + cap.args[0] = (int)enable; + vm_enable_cap(vm, &cap); +} + +static void test_msr_platform_info_enabled(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct guest_args args; + + set_msr_platform_info_enabled(vm, true); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + guest_args_read(vm, VCPU_ID, &args); + TEST_ASSERT(args.port == GUEST_PORT_SYNC, + "Received IO from port other than PORT_HOST_SYNC: %u\n", + run->io.port); + TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == + MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); +} + +static void test_msr_platform_info_disabled(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + set_msr_platform_info_enabled(vm, false); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, + "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *state; + int rv; + uint64_t msr_platform_info; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO); + if (!rv) { + fprintf(stderr, + "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); + vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, + msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + test_msr_platform_info_disabled(vm); + test_msr_platform_info_enabled(vm); + vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 17ab36605a8e..0a8e75886224 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -16,8 +16,20 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) +top_srcdir ?= ../../../.. +include $(top_srcdir)/scripts/subarch.include +ARCH ?= $(SUBARCH) + all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) +.PHONY: khdr +khdr: + make ARCH=$(ARCH) -C $(top_srcdir) headers_install + +ifdef KSFT_KHDR_INSTALL +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES):| khdr +endif + .ONESHELL: define RUN_TEST_PRINT_RESULT TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \ diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config index 2fde30191a47..a7e8cd5bb265 100644 --- a/tools/testing/selftests/memory-hotplug/config +++ b/tools/testing/selftests/memory-hotplug/config @@ -2,3 +2,4 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y CONFIG_NOTIFIER_ERROR_INJECTION=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_MEMORY_HOTREMOVE=y diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cccdb2295567..256d82d5fa87 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -15,6 +15,7 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0f45633bd634..802b4af18729 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,11 +9,11 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns testns" +IP="ip -netns ns1" log_test() { @@ -47,8 +47,10 @@ log_test() setup() { set -e - ip netns add testns + ip netns add ns1 $IP link set dev lo up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -61,7 +63,8 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + ip netns del ns1 + ip netns del ns2 &> /dev/null } get_linklocal() @@ -639,11 +642,14 @@ add_initial_route6() check_route6() { - local pfx="2001:db8:104::/64" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') [ "${out}" = "${expected}" ] && return 0 @@ -690,28 +696,33 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - $IP li add red up type vrf table 101 + ip netns add ns2 + ip -netns ns2 link set dev lo up + ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 vrf red up - $IP li set veth4 vrf red up - $IP li add dummy1 type dummy - $IP li set dummy1 vrf red up - - $IP -6 addr add 2001:db8:101::1/64 dev veth1 - $IP -6 addr add 2001:db8:101::2/64 dev veth2 - $IP -6 addr add 2001:db8:103::1/64 dev veth3 - $IP -6 addr add 2001:db8:103::2/64 dev veth4 - $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + $IP li set veth2 netns ns2 up + $IP li set veth4 netns ns2 up + ip -netns ns2 li add dummy1 type dummy + ip -netns ns2 li set dummy1 up + $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad + $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 - $IP addr add 172.16.101.2/24 dev veth2 $IP addr add 172.16.103.1/24 dev veth3 - $IP addr add 172.16.103.2/24 dev veth4 - $IP addr add 172.16.104.1/24 dev dummy1 + + ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + + ip -netns ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 set +ex } @@ -944,7 +955,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -967,6 +978,77 @@ ipv6_addr_metric_test() cleanup } +ipv6_route_metrics_test() +{ + local rc + + echo + echo "IPv6 routes with metrics" + + route_setup + + # + # single path with metrics + # + run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + # + # multipath via separate routes with metrics + # + run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400" + run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first" + + # second route is coalesced to first to make a multipath route. + # MTU of the second path is hidden from display! + run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd" + + run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2" + if [ $? -eq 0 ]; then + check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400" + log_test $? 0 " MTU of second leg" + fi + + # + # multipath with metrics + # + run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 + run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -1005,11 +1087,15 @@ add_initial_route() check_route() { - local pfx="172.16.104.0/24" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + [ "${pfx}" = "unreachable" ] && pfx=$2 + out=$($IP ro ls match ${pfx}) [ "${out}" = "${expected}" ] && return 0 @@ -1319,6 +1405,43 @@ ipv4_addr_metric_test() cleanup } +ipv4_route_metrics_test() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + route_setup + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 + run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + + ################################################################################ # usage @@ -1385,6 +1508,8 @@ do ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; + ipv6_route_metrics) ipv6_route_metrics_test;; + ipv4_route_metrics) ipv4_route_metrics_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh new file mode 100755 index 000000000000..1f8ef0eff862 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="sticky" +NUM_NETIFS=4 +TEST_MAC=de:ad:be:ef:13:37 +source lib.sh + +switch_create() +{ + ip link add dev br0 type bridge + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $h1 up + ip link set dev $swp1 up + ip link set dev $h2 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $h2 down + ip link set dev $swp1 down + ip link set dev $h1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + switch_create +} + +cleanup() +{ + pre_cleanup + switch_destroy +} + +sticky() +{ + bridge fdb add $TEST_MAC dev $swp1 master static sticky + check_err $? "Could not add fdb entry" + bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky + $MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q + bridge -j fdb show br br0 brport $swp1\ + | jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + log_test "Sticky fdb entry" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ca53b539aa2d..0e73698b2048 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -494,6 +494,14 @@ tc_rule_stats_get() | jq '.[1].options.actions[].stats.packets' } +ethtool_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 +} + mac_get() { local if_name=$1 @@ -541,6 +549,23 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +declare -A MTU_ORIG +mtu_set() +{ + local dev=$1; shift + local mtu=$1; shift + + MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu') + ip link set dev $dev mtu $mtu +} + +mtu_restore() +{ + local dev=$1; shift + + ip link set dev $dev mtu ${MTU_ORIG["$dev"]} +} + tc_offload_check() { local num_netifs=${1:-$NUM_NETIFS} diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c index 55fdcdc78eef..61ae2782388e 100644 --- a/tools/testing/selftests/net/ip_defrag.c +++ b/tools/testing/selftests/net/ip_defrag.c @@ -23,21 +23,28 @@ static bool cfg_overlap; static unsigned short cfg_port = 9000; const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; #define IP4_HLEN (sizeof(struct iphdr)) #define IP6_HLEN (sizeof(struct ip6_hdr)) #define UDP_HLEN (sizeof(struct udphdr)) -static int msg_len; +/* IPv6 fragment header lenth. */ +#define FRAG_HLEN 8 + +static int payload_len; static int max_frag_len; #define MSG_LEN_MAX 60000 /* Max UDP payload length. */ #define IP4_MF (1u << 13) /* IPv4 MF flag. */ +#define IP6_MF (1) /* IPv6 MF flag. */ + +#define CSUM_MANGLED_0 (0xffff) static uint8_t udp_payload[MSG_LEN_MAX]; static uint8_t ip_frame[IP_MAXPACKET]; -static uint16_t ip_id = 0xabcd; +static uint32_t ip_id = 0xabcd; static int msg_counter; static int frag_counter; static unsigned int seed; @@ -48,25 +55,25 @@ static void recv_validate_udp(int fd_udp) ssize_t ret; static uint8_t recv_buff[MSG_LEN_MAX]; - ret = recv(fd_udp, recv_buff, msg_len, 0); + ret = recv(fd_udp, recv_buff, payload_len, 0); msg_counter++; if (cfg_overlap) { if (ret != -1) - error(1, 0, "recv: expected timeout; got %d; seed = %u", - (int)ret, seed); + error(1, 0, "recv: expected timeout; got %d", + (int)ret); if (errno != ETIMEDOUT && errno != EAGAIN) - error(1, errno, "recv: expected timeout: %d; seed = %u", - errno, seed); + error(1, errno, "recv: expected timeout: %d", + errno); return; /* OK */ } if (ret == -1) - error(1, errno, "recv: msg_len = %d max_frag_len = %d", - msg_len, max_frag_len); - if (ret != msg_len) - error(1, 0, "recv: wrong size: %d vs %d", (int)ret, msg_len); - if (memcmp(udp_payload, recv_buff, msg_len)) + error(1, errno, "recv: payload_len = %d max_frag_len = %d", + payload_len, max_frag_len); + if (ret != payload_len) + error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len); + if (memcmp(udp_payload, recv_buff, payload_len)) error(1, 0, "recv: wrong data"); } @@ -92,31 +99,95 @@ static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum) static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr) { uint32_t sum = 0; + uint16_t res; sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src), - IPPROTO_UDP + (uint32_t)(UDP_HLEN + msg_len)); - sum = raw_checksum((uint8_t *)udp_payload, msg_len, sum); + IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len)); + sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; +} + +static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr) +{ + uint32_t sum = 0; + uint16_t res; + + sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src), + IPPROTO_UDP); + sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum); sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); - return htons(0xffff & ~sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; } static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen, - struct ip *iphdr, int offset) + int offset, bool ipv6) { int frag_len; int res; + int payload_offset = offset > 0 ? offset - UDP_HLEN : 0; + uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN : + ip_frame + IP4_HLEN; + + if (offset == 0) { + struct udphdr udphdr; + udphdr.source = htons(cfg_port + 1); + udphdr.dest = htons(cfg_port); + udphdr.len = htons(UDP_HLEN + payload_len); + udphdr.check = 0; + if (ipv6) + udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr); + else + udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr); + memcpy(frag_start, &udphdr, UDP_HLEN); + } - if (msg_len - offset <= max_frag_len) { - /* This is the last fragment. */ - frag_len = IP4_HLEN + msg_len - offset; - iphdr->ip_off = htons((offset + UDP_HLEN) / 8); + if (ipv6) { + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = FRAG_HLEN + payload_len - payload_offset; + fraghdr->ip6f_offlg = htons(offset); + } else { + frag_len = FRAG_HLEN + max_frag_len; + fraghdr->ip6f_offlg = htons(offset | IP6_MF); + } + ip6hdr->ip6_plen = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - FRAG_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - FRAG_HLEN); + frag_len += IP6_HLEN; } else { - frag_len = IP4_HLEN + max_frag_len; - iphdr->ip_off = htons((offset + UDP_HLEN) / 8 | IP4_MF); + struct ip *iphdr = (struct ip *)ip_frame; + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = IP4_HLEN + payload_len - payload_offset; + iphdr->ip_off = htons(offset / 8); + } else { + frag_len = IP4_HLEN + max_frag_len; + iphdr->ip_off = htons(offset / 8 | IP4_MF); + } + iphdr->ip_len = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - IP4_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - IP4_HLEN); } - iphdr->ip_len = htons(frag_len); - memcpy(ip_frame + IP4_HLEN, udp_payload + offset, - frag_len - IP4_HLEN); res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); if (res < 0) @@ -127,10 +198,11 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen, frag_counter++; } -static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen) +static void send_udp_frags(int fd_raw, struct sockaddr *addr, + socklen_t alen, bool ipv6) { struct ip *iphdr = (struct ip *)ip_frame; - struct udphdr udphdr; + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; int res; int offset; int frag_len; @@ -142,31 +214,55 @@ static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen) * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then * even fragments (0th, 2nd, etc.) are sent out. */ - memset(iphdr, 0, sizeof(*iphdr)); - iphdr->ip_hl = 5; - iphdr->ip_v = 4; - iphdr->ip_tos = 0; - iphdr->ip_id = htons(ip_id++); - iphdr->ip_ttl = 0x40; - iphdr->ip_p = IPPROTO_UDP; - iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK); - iphdr->ip_dst = addr4; - iphdr->ip_sum = 0; + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + ((struct sockaddr_in6 *)addr)->sin6_port = 0; + memset(ip6hdr, 0, sizeof(*ip6hdr)); + ip6hdr->ip6_flow = htonl(6<<28); /* Version. */ + ip6hdr->ip6_nxt = IPPROTO_FRAGMENT; + ip6hdr->ip6_hops = 255; + ip6hdr->ip6_src = addr6; + ip6hdr->ip6_dst = addr6; + fraghdr->ip6f_nxt = IPPROTO_UDP; + fraghdr->ip6f_reserved = 0; + fraghdr->ip6f_ident = htonl(ip_id++); + } else { + memset(iphdr, 0, sizeof(*iphdr)); + iphdr->ip_hl = 5; + iphdr->ip_v = 4; + iphdr->ip_tos = 0; + iphdr->ip_id = htons(ip_id++); + iphdr->ip_ttl = 0x40; + iphdr->ip_p = IPPROTO_UDP; + iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK); + iphdr->ip_dst = addr4; + iphdr->ip_sum = 0; + } /* Odd fragments. */ - offset = 0; - while (offset < msg_len) { - send_fragment(fd_raw, addr, alen, iphdr, offset); + offset = max_frag_len; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } if (cfg_overlap) { /* Send an extra random fragment. */ - offset = rand() % (UDP_HLEN + msg_len - 1); + offset = rand() % (UDP_HLEN + payload_len - 1); /* sendto() returns EINVAL if offset + frag_len is too small. */ - frag_len = IP4_HLEN + UDP_HLEN + rand() % 256; - iphdr->ip_off = htons(offset / 8 | IP4_MF); - iphdr->ip_len = htons(frag_len); + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + frag_len = max_frag_len + rand() % 256; + /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */ + frag_len &= ~0x7; + fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF); + ip6hdr->ip6_plen = htons(frag_len); + frag_len += IP6_HLEN; + } else { + frag_len = IP4_HLEN + UDP_HLEN + rand() % 256; + iphdr->ip_off = htons(offset / 8 | IP4_MF); + iphdr->ip_len = htons(frag_len); + } res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); if (res < 0) error(1, errno, "sendto overlap"); @@ -175,48 +271,26 @@ static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen) frag_counter++; } - /* Zeroth fragment (UDP header). */ - frag_len = IP4_HLEN + UDP_HLEN; - iphdr->ip_len = htons(frag_len); - iphdr->ip_off = htons(IP4_MF); - - udphdr.source = htons(cfg_port + 1); - udphdr.dest = htons(cfg_port); - udphdr.len = htons(UDP_HLEN + msg_len); - udphdr.check = 0; - udphdr.check = udp_checksum(iphdr, &udphdr); - - memcpy(ip_frame + IP4_HLEN, &udphdr, UDP_HLEN); - res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); - if (res < 0) - error(1, errno, "sendto UDP header"); - if (res != frag_len) - error(1, 0, "sendto UDP header: %d vs %d", (int)res, frag_len); - frag_counter++; - - /* Even fragments. */ - offset = max_frag_len; - while (offset < msg_len) { - send_fragment(fd_raw, addr, alen, iphdr, offset); + /* Event fragments. */ + offset = 0; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); offset += 2 * max_frag_len; } } -static void run_test(struct sockaddr *addr, socklen_t alen) +static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) { - int fd_tx_udp, fd_tx_raw, fd_rx_udp; + int fd_tx_raw, fd_rx_udp; struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 }; int idx; + int min_frag_len = ipv6 ? 1280 : 8; /* Initialize the payload. */ for (idx = 0; idx < MSG_LEN_MAX; ++idx) udp_payload[idx] = idx % 256; /* Open sockets. */ - fd_tx_udp = socket(addr->sa_family, SOCK_DGRAM, 0); - if (fd_tx_udp == -1) - error(1, errno, "socket tx_udp"); - fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW); if (fd_tx_raw == -1) error(1, errno, "socket tx_raw"); @@ -230,22 +304,21 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) error(1, errno, "setsockopt rcv timeout"); - for (msg_len = 1; msg_len < MSG_LEN_MAX; msg_len += (rand() % 4096)) { + for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX; + payload_len += (rand() % 4096)) { if (cfg_verbose) - printf("msg_len: %d\n", msg_len); - max_frag_len = addr->sa_family == AF_INET ? 8 : 1280; - for (; max_frag_len < 1500 && max_frag_len <= msg_len; - max_frag_len += 8) { - send_udp_frags_v4(fd_tx_raw, addr, alen); + printf("payload_len: %d\n", payload_len); + max_frag_len = min_frag_len; + do { + send_udp_frags(fd_tx_raw, addr, alen, ipv6); recv_validate_udp(fd_rx_udp); - } + max_frag_len += 8 * (rand() % 8); + } while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len); } /* Cleanup. */ if (close(fd_tx_raw)) error(1, errno, "close tx_raw"); - if (close(fd_tx_udp)) - error(1, errno, "close tx_udp"); if (close(fd_rx_udp)) error(1, errno, "close rx_udp"); @@ -265,13 +338,18 @@ static void run_test_v4(void) addr.sin_port = htons(cfg_port); addr.sin_addr = addr4; - run_test((void *)&addr, sizeof(addr)); + run_test((void *)&addr, sizeof(addr), false /* !ipv6 */); } static void run_test_v6(void) { - fprintf(stderr, "NOT IMPL.\n"); - exit(1); + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr), true /* ipv6 */); } static void parse_opts(int argc, char **argv) @@ -303,6 +381,8 @@ int main(int argc, char **argv) parse_opts(argc, argv); seed = time(NULL); srand(seed); + /* Print the seed to track/reproduce potential failures. */ + printf("seed = %d\n", seed); if (cfg_do_ipv4) run_test_v4(); diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh index 78743adcca9e..3a4042d918f0 100755 --- a/tools/testing/selftests/net/ip_defrag.sh +++ b/tools/testing/selftests/net/ip_defrag.sh @@ -6,23 +6,34 @@ set +x set -e -echo "ipv4 defrag" +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 &> /dev/null + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 &> /dev/null +} -run_v4() { -sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null -sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null -./ip_defrag -4 +cleanup() { + ip netns del "${NETNS}" } -export -f run_v4 -./in_netns.sh "run_v4" +trap cleanup EXIT +setup + +echo "ipv4 defrag" +ip netns exec "${NETNS}" ./ip_defrag -4 + echo "ipv4 defrag with overlaps" -run_v4o() { -sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null -sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null -./ip_defrag -4o -} -export -f run_v4o +ip netns exec "${NETNS}" ./ip_defrag -4o + +echo "ipv6 defrag" +ip netns exec "${NETNS}" ./ip_defrag -6 + +echo "ipv6 defrag with overlaps" +ip netns exec "${NETNS}" ./ip_defrag -6o -./in_netns.sh "run_v4o" diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 32a194e3e07a..b9cdb68df4c5 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -6,6 +6,26 @@ # # Tests currently implemented: # +# - pmtu_ipv4 +# Set up two namespaces, A and B, with two paths between them over routers +# R1 and R2 (also implemented with namespaces), with different MTUs: +# +# segment a_r1 segment b_r1 a_r1: 2000 +# .--------------R1--------------. a_r2: 1500 +# A B a_r3: 2000 +# '--------------R2--------------' a_r4: 1400 +# segment a_r2 segment b_r2 +# +# Check that PMTU exceptions with the correct PMTU are created. Then +# decrease and increase the MTU of the local link for one of the paths, +# A to R1, checking that route exception PMTU changes accordingly over +# this path. Also check that locked exceptions are created when an ICMP +# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is +# received +# +# - pmtu_ipv6 +# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -50,6 +70,8 @@ ksft_skip=4 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" + pmtu_ipv4_exception ipv4: PMTU exceptions + pmtu_ipv6_exception ipv6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment @@ -60,8 +82,45 @@ tests=" NS_A="ns-$(mktemp -u XXXXXX)" NS_B="ns-$(mktemp -u XXXXXX)" +NS_R1="ns-$(mktemp -u XXXXXX)" +NS_R2="ns-$(mktemp -u XXXXXX)" ns_a="ip netns exec ${NS_A}" ns_b="ip netns exec ${NS_B}" +ns_r1="ip netns exec ${NS_R1}" +ns_r2="ip netns exec ${NS_R2}" + +# Addressing and routing for tests with routers: four network segments, with +# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an +# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). +# Addresses are: +# - IPv4: PREFIX4.SEGMENT.ID (/24) +# - IPv6: PREFIX6:SEGMENT::ID (/64) +prefix4="192.168" +prefix6="fd00" +a_r1=1 +a_r2=2 +b_r1=3 +b_r2=4 +# ns peer segment +routing_addrs=" + A R1 ${a_r1} + A R2 ${a_r2} + B R1 ${b_r1} + B R2 ${b_r2} +" +# Traffic from A to B goes through R1 by default, and through R2, if destined to +# B's address on the b_r2 segment. +# Traffic from B to A goes through R1. +# ns destination gateway +routes=" + A default ${prefix4}.${a_r1}.2 + A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2 + B default ${prefix4}.${b_r1}.2 + + A default ${prefix6}:${a_r1}::2 + A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 + B default ${prefix6}:${b_r1}::2 +" veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" @@ -94,9 +153,15 @@ err_flush() { err_buf= } +# Find the auto-generated name for this namespace +nsname() { + eval echo \$NS_$1 +} + setup_namespaces() { - ip netns add ${NS_A} || return 1 - ip netns add ${NS_B} + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns add ${n} || return 1 + done } setup_veth() { @@ -167,6 +232,49 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_routing() { + for i in ${NS_R1} ${NS_R2}; do + ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1 + ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1 + done + + for i in ${routing_addrs}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${peer}" = "" ] && peer="${i}" && continue + [ "${segment}" = "" ] && segment="${i}" + + ns_name="$(nsname ${ns})" + peer_name="$(nsname ${peer})" + if="veth_${ns}-${peer}" + ifpeer="veth_${peer}-${ns}" + + # Create veth links + ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1 + ip -n ${peer_name} link set dev ${ifpeer} up + + # Add addresses + ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if} + ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if} + + ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer} + ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer} + + ns=""; peer=""; segment="" + done + + for i in ${routes}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${addr}" = "" ] && addr="${i}" && continue + [ "${gw}" = "" ] && gw="${i}" + + ns_name="$(nsname ${ns})" + + ip -n ${ns_name} route add ${addr} via ${gw} + + ns=""; addr=""; gw="" + done +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -178,8 +286,9 @@ setup() { cleanup() { [ ${cleanup_done} -eq 1 ] && return - ip netns del ${NS_A} 2 > /dev/null - ip netns del ${NS_B} 2 > /dev/null + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns del ${n} 2> /dev/null + done cleanup_done=1 } @@ -196,7 +305,9 @@ mtu_parse() { next=0 for i in ${input}; do + [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue [ ${next} -eq 1 ] && echo "${i}" && return + [ ${next} -eq 2 ] && echo "lock ${i}" && return [ "${i}" = "mtu" ] && next=1 done } @@ -229,6 +340,109 @@ route_get_dst_pmtu_from_exception() { mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" } +check_pmtu_value() { + expected="${1}" + value="${2}" + event="${3}" + + [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0 + [ "${value}" = "${expected}" ] && return 0 + [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1 + [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1 + err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}" + return 1 +} + +test_pmtu_ipvX() { + family=${1} + + setup namespaces routing || return 2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Decrease local MTU below PMTU, check for PMTU decrease in route exception + mtu "${ns_a}" veth_A-R1 1300 + mtu "${ns_r1}" veth_R1-A 1300 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Increase MTU, check for PMTU increase in route exception + mtu "${ns_a}" veth_A-R1 1700 + mtu "${ns_r1}" veth_R1-A 1700 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Skip PMTU locking tests for IPv6 + [ $family -eq 6 ] && return 0 + + # Decrease remote MTU on path via R2, get new exception + mtu "${ns_r2}" veth_R2-B 400 + mtu "${ns_b}" veth_B-R2 400 + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 + + # Decrease local MTU below PMTU + mtu "${ns_a}" veth_A-R2 500 + mtu "${ns_r2}" veth_R2-A 500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1 + + # Increase local MTU + mtu "${ns_a}" veth_A-R2 1500 + mtu "${ns_r2}" veth_R2-A 1500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 + + # Get new exception + ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 +} + +test_pmtu_ipv4_exception() { + test_pmtu_ipvX 4 +} + +test_pmtu_ipv6_exception() { + test_pmtu_ipvX 6 +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 @@ -248,24 +462,13 @@ test_pmtu_vti4_exception() { # exception is created ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" != "" ]; then - err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}" - return 1 - fi + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" = "" ]; then - err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))" - return 1 - fi - - # ...with the right PMTU value - if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then - err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}" - return 1 - fi + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" } test_pmtu_vti6_exception() { @@ -280,25 +483,18 @@ test_pmtu_vti6_exception() { ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null # Check that exception was created - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then - err " tunnel exceeding link layer MTU didn't create route exception" - return 1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 # Decrease tunnel MTU, check for PMTU decrease in route exception mtu "${ns_a}" vti6_a 3000 - - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then - err " decreasing tunnel MTU didn't decrease route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 # Increase tunnel MTU, check for PMTU increase in route exception mtu "${ns_a}" vti6_a 9000 - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then - err " increasing tunnel MTU didn't increase route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 return ${fail} } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 07daff076ce0..fac68d710f35 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile) buf = (char *)malloc(st.st_size); EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send); - EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send); + EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send); EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); - EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size); + EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); } TEST_F(tls, recv_max) @@ -160,7 +160,7 @@ TEST_F(tls, msg_more) EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT), + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL), send_len * 2); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single) msg.msg_iov = &vec; msg.msg_iovlen = 1; EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe) ASSERT_GE(pipe(p), 0); EXPECT_GE(write(p[1], mem_send, send_len), 0); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2) EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice) ASSERT_GE(pipe(p), 0); EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2); - EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2); EXPECT_EQ(memcmp(test_str, buf, send_len2), 0); EXPECT_GE(write(p[1], mem_send, send_len), send_len); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv) EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); - EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len); + EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len); EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0); EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0); @@ -502,6 +502,55 @@ TEST_F(tls, recv_peek_multiple) EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } +TEST_F(tls, recv_peek_multiple_records) +{ + char const *test_str = "test_read_peek_mult_recs"; + char const *test_str_first = "test_read_peek"; + char const *test_str_second = "_mult_recs"; + int len; + char buf[64]; + + len = strlen(test_str_first); + EXPECT_EQ(send(self->fd, test_str_first, len, 0), len); + + len = strlen(test_str_second) + 1; + EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + + len = strlen(test_str_first); + memset(buf, 0, len); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); + + /* MSG_PEEK can only peek into the current record. */ + len = strlen(test_str_first); + EXPECT_EQ(memcmp(test_str_first, buf, len), 0); + + len = strlen(test_str) + 1; + memset(buf, 0, len); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len); + + /* Non-MSG_PEEK will advance strparser (and therefore record) + * however. + */ + len = strlen(test_str) + 1; + EXPECT_EQ(memcmp(test_str, buf, len), 0); + + /* MSG_MORE will hold current record open, so later MSG_PEEK + * will see everything. + */ + len = strlen(test_str_first); + EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len); + + len = strlen(test_str_second) + 1; + EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + + len = strlen(test_str) + 1; + memset(buf, 0, len); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); + + len = strlen(test_str) + 1; + EXPECT_EQ(memcmp(test_str, buf, len), 0); +} + TEST_F(tls, recv_peek_large_buf_mult_recs) { char const *test_str = "test_read_peek_mult_recs"; @@ -524,6 +573,7 @@ TEST_F(tls, recv_peek_large_buf_mult_recs) EXPECT_EQ(memcmp(test_str, buf, len), 0); } + TEST_F(tls, pollin) { char const *test_str = "test_poll"; @@ -537,7 +587,7 @@ TEST_F(tls, pollin) EXPECT_EQ(poll(&fd, 1, 20), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); /* Test timing out */ EXPECT_EQ(poll(&fd, 1, 20), 0); } @@ -555,7 +605,7 @@ TEST_F(tls, poll_wait) /* Set timeout to inf. secs */ EXPECT_EQ(poll(&fd, 1, -1), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len); } TEST_F(tls, blocking) @@ -701,7 +751,7 @@ TEST_F(tls, control_msg) EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); cmsg = CMSG_FIRSTHDR(&msg); EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index a728040edbe1..14cfcf006936 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -5,6 +5,7 @@ TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp all: $(TEST_PROGS) +top_srcdir = ../../../../.. include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 93baacab7693..d056486f49de 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -1,5 +1,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index b4d7432a0ecd..d40300a65b42 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES := exec_target CFLAGS += -O2 +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 1be547434a49..ede4d3dae750 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -5,6 +5,7 @@ all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c ../utils.c +top_srcdir = ../../../../.. include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 1cf89a34d97c..44574f3818b3 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -17,6 +17,7 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ EXTRA_SOURCES := validate.c ../harness.c stubs.S +top_srcdir = ../../../../.. include ../../lib.mk $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 55d7db7a616b..5df476364b4d 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ dscr_sysfs_thread_test +top_srcdir = ../../../../.. include ../../lib.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 0dd3a01fdab9..11a10d7a2bbd 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 8ebbe96d80a8..33ced6e0ad25 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors TEST_GEN_FILES := tempfile +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 6e1629bf5b09..19046db995fe 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -5,6 +5,7 @@ noarg: TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c +top_srcdir = ../../../../.. include ../../lib.mk all: $(TEST_GEN_PROGS) ebb diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index c4e64bc2e265..bd5dfa509272 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -17,6 +17,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ lost_exception_test no_handler_test \ cycles_with_mmcr2_test +top_srcdir = ../../../../../.. include ../../../lib.mk $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \ diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index 175366db7be8..ea2b7bd09e36 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -2,6 +2,7 @@ CFLAGS += -I$(CURDIR) TEST_GEN_PROGS := load_unaligned_zeropad +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 28f5b781a553..923d531265f8 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -4,6 +4,7 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ perf-hwbreak +top_srcdir = ../../../../.. include ../../lib.mk all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index a7cbd5082e27..1fca25c6ace0 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -8,6 +8,7 @@ $(TEST_PROGS): ../harness.c ../utils.c signal.S CFLAGS += -maltivec signal_tm: CFLAGS += -mhtm +top_srcdir = ../../../../.. include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 10b35c87a4f4..7fc0623d85c3 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -29,6 +29,7 @@ endif ASFLAGS = $(CFLAGS) +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index 30b8ff8fb82e..fcd2dcb8972b 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -5,6 +5,7 @@ ASFLAGS += -O2 -Wall -g -nostdlib -m64 EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S +top_srcdir = ../../../../.. include ../../lib.mk $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index da22ca7c38c1..161b8846336f 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS := ipc_unmuxed CFLAGS += -I../../../../../usr/include +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index c0e45d2dde25..9fc2cf6fbc92 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile index f8ced26748f8..fb82068c9fda 100644 --- a/tools/testing/selftests/powerpc/vphn/Makefile +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -2,6 +2,7 @@ TEST_GEN_PROGS := test-vphn CFLAGS += -m64 +top_srcdir = ../../../../.. include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 642d4e12abea..eec2663261f2 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -56,15 +56,13 @@ unsigned int yield_mod_cnt, nr_abort; printf(fmt, ## __VA_ARGS__); \ } while (0) -#if defined(__x86_64__) || defined(__i386__) +#ifdef __i386__ #define INJECT_ASM_REG "eax" #define RSEQ_INJECT_CLOBBER \ , INJECT_ASM_REG -#ifdef __i386__ - #define RSEQ_INJECT_ASM(n) \ "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ @@ -76,9 +74,16 @@ unsigned int yield_mod_cnt, nr_abort; #elif defined(__x86_64__) +#define INJECT_ASM_REG_P "rax" +#define INJECT_ASM_REG "eax" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG_P \ + , INJECT_ASM_REG + #define RSEQ_INJECT_ASM(n) \ - "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \ - "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ + "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \ + "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ "jz 333f\n\t" \ "222:\n\t" \ @@ -86,10 +91,6 @@ unsigned int yield_mod_cnt, nr_abort; "jnz 222b\n\t" \ "333:\n\t" -#else -#error "Unsupported architecture" -#endif - #elif defined(__s390__) #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 49a6f8c3fdae..f9281e8aa313 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -232,6 +232,8 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) + - buildebpfPlugin.py: + builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile new file mode 100644 index 000000000000..dc92eb271d9a --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 + +APIDIR := ../../../../include/uapi +TEST_GEN_FILES = action.o + +top_srcdir = ../../../../.. +include ../../lib.mk + +CLANG ?= clang +LLC ?= llc +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif + +CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +CLANG_FLAGS = -I. -I$(APIDIR) \ + $(CLANG_SYS_INCLUDES) \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/%.o: %.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c new file mode 100644 index 000000000000..c32b99b80e19 --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/action.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Davide Caratti, Red Hat inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> + +__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s) +{ + return TC_ACT_OK; +} + +__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s) +{ + s->data = 0x0; + return TC_ACT_OK; +} + +char _license[] __attribute__((section("license"),used)) = "GPL"; diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py new file mode 100644 index 000000000000..9f0ba10c44b4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -0,0 +1,66 @@ +''' +build ebpf program +''' + +import os +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin +from tdc_config import * + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'buildebpf/SubPlugin' + self.tap = '' + super().__init__() + + def pre_suite(self, testcount, testidlist): + super().pre_suite(testcount, testidlist) + + if self.args.buildebpf: + self._ebpf_makeall() + + def post_suite(self, index): + super().post_suite(index) + + self._ebpf_makeclean() + + def add_args(self, parser): + super().add_args(parser) + + self.argparser_group = self.argparser.add_argument_group( + 'buildebpf', + 'options for buildebpfPlugin') + self.argparser_group.add_argument( + '-B', '--buildebpf', action='store_true', + help='build eBPF programs') + + return self.argparser + + def _ebpf_makeall(self): + if self.args.buildebpf: + self._make('all') + + def _ebpf_makeclean(self): + if self.args.buildebpf: + self._make('clean') + + def _make(self, target): + command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=ENVIR) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 6f289a49e5ec..5970cee6d05f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -55,7 +55,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o", [ "$TC action flush action bpf", 0, @@ -63,14 +62,13 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ - "$TC action flush action bpf", - "rm -f _b.o" + "$TC action flush action bpf" ] }, { @@ -81,7 +79,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o", [ "$TC action flush action bpf", 0, @@ -89,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ @@ -100,8 +97,7 @@ 0, 1, 255 - ], - "rm -f _c.o" + ] ] }, { diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index a023d0d62b25..d651bc1501bd 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -16,7 +16,9 @@ NAMES = { 'DEV2': '', 'BATCH_FILE': './batch.txt', # Name of the namespace to use - 'NS': 'tcut' + 'NS': 'tcut', + # Directory containing eBPF test programs + 'EBPFDIR': './bpf' } diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9881876d2aa0..e94b7b14bcb2 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -26,10 +26,6 @@ TEST_PROGS := run_vmtests include ../lib.mk -$(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h $(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap - -../../../../usr/include/linux/kernel.h: - make -C ../../../.. headers_install diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c index 235259011704..35edd61d1663 100644 --- a/tools/testing/selftests/x86/test_vdso.c +++ b/tools/testing/selftests/x86/test_vdso.c @@ -17,6 +17,7 @@ #include <errno.h> #include <sched.h> #include <stdbool.h> +#include <limits.h> #ifndef SYS_getcpu # ifdef __x86_64__ @@ -31,6 +32,14 @@ int nerrs = 0; +typedef int (*vgettime_t)(clockid_t, struct timespec *); + +vgettime_t vdso_clock_gettime; + +typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); + +vgtod_t vdso_gettimeofday; + typedef long (*getcpu_t)(unsigned *, unsigned *, void *); getcpu_t vgetcpu; @@ -95,6 +104,15 @@ static void fill_function_pointers() printf("Warning: failed to find getcpu in vDSO\n"); vgetcpu = (getcpu_t) vsyscall_getcpu(); + + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + printf("Warning: failed to find clock_gettime in vDSO\n"); + + vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); + if (!vdso_gettimeofday) + printf("Warning: failed to find gettimeofday in vDSO\n"); + } static long sys_getcpu(unsigned * cpu, unsigned * node, @@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node, return syscall(__NR_getcpu, cpu, node, cache); } +static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) +{ + return syscall(__NR_clock_gettime, id, ts); +} + +static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return syscall(__NR_gettimeofday, tv, tz); +} + static void test_getcpu(void) { printf("[RUN]\tTesting getcpu...\n"); @@ -155,10 +183,154 @@ static void test_getcpu(void) } } +static bool ts_leq(const struct timespec *a, const struct timespec *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_nsec <= b->tv_nsec; +} + +static bool tv_leq(const struct timeval *a, const struct timeval *b) +{ + if (a->tv_sec != b->tv_sec) + return a->tv_sec < b->tv_sec; + else + return a->tv_usec <= b->tv_usec; +} + +static char const * const clocknames[] = { + [0] = "CLOCK_REALTIME", + [1] = "CLOCK_MONOTONIC", + [2] = "CLOCK_PROCESS_CPUTIME_ID", + [3] = "CLOCK_THREAD_CPUTIME_ID", + [4] = "CLOCK_MONOTONIC_RAW", + [5] = "CLOCK_REALTIME_COARSE", + [6] = "CLOCK_MONOTONIC_COARSE", + [7] = "CLOCK_BOOTTIME", + [8] = "CLOCK_REALTIME_ALARM", + [9] = "CLOCK_BOOTTIME_ALARM", + [10] = "CLOCK_SGI_CYCLE", + [11] = "CLOCK_TAI", +}; + +static void test_one_clock_gettime(int clock, const char *name) +{ + struct timespec start, vdso, end; + int vdso_ret, end_ret; + + printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); + + if (sys_clock_gettime(clock, &start) < 0) { + if (errno == EINVAL) { + vdso_ret = vdso_clock_gettime(clock, &vdso); + if (vdso_ret == -EINVAL) { + printf("[OK]\tNo such clock.\n"); + } else { + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); + nerrs++; + } + } else { + printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); + } + return; + } + + vdso_ret = vdso_clock_gettime(clock, &vdso); + end_ret = sys_clock_gettime(clock, &end); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + (unsigned long long)start.tv_sec, start.tv_nsec, + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, + (unsigned long long)end.tv_sec, end.tv_nsec); + + if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } +} + +static void test_clock_gettime(void) +{ + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); + clock++) { + test_one_clock_gettime(clock, clocknames[clock]); + } + + /* Also test some invalid clock ids */ + test_one_clock_gettime(-1, "invalid"); + test_one_clock_gettime(INT_MIN, "invalid"); + test_one_clock_gettime(INT_MAX, "invalid"); +} + +static void test_gettimeofday(void) +{ + struct timeval start, vdso, end; + struct timezone sys_tz, vdso_tz; + int vdso_ret, end_ret; + + if (!vdso_gettimeofday) + return; + + printf("[RUN]\tTesting gettimeofday...\n"); + + if (sys_gettimeofday(&start, &sys_tz) < 0) { + printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); + nerrs++; + return; + } + + vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + end_ret = sys_gettimeofday(&end, NULL); + + if (vdso_ret != 0 || end_ret != 0) { + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", + vdso_ret, errno); + nerrs++; + return; + } + + printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", + (unsigned long long)start.tv_sec, start.tv_usec, + (unsigned long long)vdso.tv_sec, vdso.tv_usec, + (unsigned long long)end.tv_sec, end.tv_usec); + + if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { + printf("[FAIL]\tTimes are out of sequence\n"); + nerrs++; + } + + if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && + sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { + printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", + sys_tz.tz_minuteswest, sys_tz.tz_dsttime); + } else { + printf("[FAIL]\ttimezones do not match\n"); + nerrs++; + } + + /* And make sure that passing NULL for tz doesn't crash. */ + vdso_gettimeofday(&vdso, NULL); +} + int main(int argc, char **argv) { fill_function_pointers(); + test_clock_gettime(); + test_gettimeofday(); + + /* + * Test getcpu() last so that, if something goes wrong setting affinity, + * we still run the other tests. + */ test_getcpu(); return nerrs ? 1 : 0; |