aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h13
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h13
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst139
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst6
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool17
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h8
-rw-r--r--tools/bpf/bpftool/map.c12
-rw-r--r--tools/bpf/bpftool/net.c272
-rw-r--r--tools/bpf/bpftool/netlink_dumper.c174
-rw-r--r--tools/bpf/bpftool/netlink_dumper.h95
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/hv/hv_kvp_daemon.c2
-rw-r--r--tools/include/linux/lockdep.h3
-rw-r--r--tools/include/linux/nmi.h0
-rw-r--r--tools/include/tools/libc_compat.h2
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/drm/drm.h9
-rw-r--r--tools/include/uapi/linux/bpf.h26
-rw-r--r--tools/include/uapi/linux/if_link.h17
-rw-r--r--tools/include/uapi/linux/kvm.h6
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/vhost.h18
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat2
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c129
-rw-r--r--tools/lib/bpf/libbpf.c24
-rw-r--r--tools/lib/bpf/libbpf.h16
-rw-r--r--tools/lib/bpf/libbpf_errno.c1
-rw-r--r--tools/lib/bpf/netlink.c331
-rw-r--r--tools/lib/bpf/nlattr.c33
-rw-r--r--tools/lib/bpf/nlattr.h38
-rw-r--r--tools/lib/bpf/str_error.c18
-rw-r--r--tools/lib/bpf/str_error.h6
-rw-r--r--tools/perf/Documentation/Makefile2
-rw-r--r--tools/perf/Makefile.perf14
-rw-r--r--tools/perf/arch/arm64/Makefile5
-rwxr-xr-xtools/perf/arch/arm64/entry/syscalls/mksyscalltbl6
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c4
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c6
-rw-r--r--tools/perf/arch/x86/tests/bp-modify.c213
-rw-r--r--tools/perf/util/annotate.c32
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/evsel.c5
-rw-r--r--tools/perf/util/map.c11
-rw-r--r--tools/perf/util/trace-event-info.c2
-rw-r--r--tools/perf/util/trace-event-parse.c7
-rw-r--r--tools/testing/selftests/android/Makefile2
-rw-r--r--tools/testing/selftests/android/config (renamed from tools/testing/selftests/android/ion/config)0
-rw-r--r--tools/testing/selftests/android/ion/Makefile2
-rw-r--r--tools/testing/selftests/bpf/.gitignore6
-rw-r--r--tools/testing/selftests/bpf/Makefile8
-rw-r--r--tools/testing/selftests/bpf/bpf_flow.c373
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c140
-rw-r--r--tools/testing/selftests/bpf/test_flow_dissector.c782
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh115
-rw-r--r--tools/testing/selftests/bpf/test_maps.c10
-rw-r--r--tools/testing/selftests/bpf/test_progs.c20
-rwxr-xr-xtools/testing/selftests/bpf/with_addr.sh54
-rwxr-xr-xtools/testing/selftests/bpf/with_tunnels.sh36
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c38
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h1
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c205
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh347
-rw-r--r--tools/testing/selftests/efivarfs/config1
-rw-r--r--tools/testing/selftests/futex/functional/Makefile1
-rw-r--r--tools/testing/selftests/gpio/Makefile7
-rw-r--r--tools/testing/selftests/kselftest.h1
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile12
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h4
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c89
-rw-r--r--tools/testing/selftests/kvm/platform_info_test.c110
-rw-r--r--tools/testing/selftests/lib.mk12
-rw-r--r--tools/testing/selftests/memory-hotplug/config1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh167
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh69
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh25
-rw-r--r--tools/testing/selftests/net/ip_defrag.c250
-rwxr-xr-xtools/testing/selftests/net/ip_defrag.sh39
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh258
-rw-r--r--tools/testing/selftests/net/tls.c74
-rw-r--r--tools/testing/selftests/networking/timestamping/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile1
-rw-r--r--tools/testing/selftests/rseq/param_test.c19
-rw-r--r--tools/testing/selftests/tc-testing/README2
-rw-r--r--tools/testing/selftests/tc-testing/bpf/Makefile29
-rw-r--r--tools/testing/selftests/tc-testing/bpf/action.c23
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py66
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json16
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py4
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rw-r--r--tools/testing/selftests/x86/test_vdso.c172
116 files changed, 4991 insertions, 419 deletions
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 16e006f708ca..4602464ebdfb 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 4e76630dd655..97c3478ee6e7 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,7 @@
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
/*
* KVM s390 specific structures and definitions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct {
__u64 reserved1[2];
__u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
};
};
};
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index c535c2fdea13..86299efa804a 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -378,4 +378,41 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
+
+struct kvm_vmx_nested_state {
+ __u64 vmxon_pa;
+ __u64 vmcs_pa;
+
+ struct {
+ __u16 flags;
+ } smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+ /* KVM_STATE_* flags */
+ __u16 flags;
+
+ /* 0 for VMX, 1 for SVM. */
+ __u16 format;
+
+ /* 128 for SVM, 128 + VMCS size for VMX. */
+ __u32 size;
+
+ union {
+ /* VMXON, VMCS */
+ struct kvm_vmx_nested_state vmx;
+
+ /* Pad the header to 128 bytes. */
+ __u8 pad[120];
+ };
+
+ __u8 data[0];
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
new file mode 100644
index 000000000000..408ec30d8872
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -0,0 +1,139 @@
+================
+bpftool-net
+================
+-------------------------------------------------------------------------------
+tool for inspection of netdev/tc related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **net** *COMMAND*
+
+ *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** } [ **dev** name ] | **help**
+
+NET COMMANDS
+============
+
+| **bpftool** **net { show | list } [ dev name ]**
+| **bpftool** **net help**
+
+DESCRIPTION
+===========
+ **bpftool net { show | list } [ dev name ]**
+ List bpf program attachments in the kernel networking subsystem.
+
+ Currently, only device driver xdp attachments and tc filter
+ classification/action attachments are implemented, i.e., for
+ program types **BPF_PROG_TYPE_SCHED_CLS**,
+ **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**.
+ For programs attached to a particular cgroup, e.g.,
+ **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**,
+ **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ users can use **bpftool cgroup** to dump cgroup attachments.
+ For sk_{filter, skb, msg, reuseport} and lwt/seg6
+ bpf programs, users should consult other tools, e.g., iproute2.
+
+ The current output will start with all xdp program attachments, followed by
+ all tc class/qdisc bpf program attachments. Both xdp programs and
+ tc programs are ordered based on ifindex number. If multiple bpf
+ programs attached to the same networking device through **tc filter**,
+ the order will be first all bpf programs attached to tc classes, then
+ all bpf programs attached to non clsact qdiscs, and finally all
+ bpf programs attached to root and clsact qdisc.
+
+ **bpftool net help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool net**
+
+::
+
+ xdp:
+ eth0(2) driver id 198
+
+ tc:
+ eth0(2) htb name prefix_matcher.o:[cls_prefix_matcher_htb] id 111727 act []
+ eth0(2) clsact/ingress fbflow_icmp id 130246 act []
+ eth0(2) clsact/egress prefix_matcher.o:[cls_prefix_matcher_clsact] id 111726
+ eth0(2) clsact/egress cls_fg_dscp id 108619 act []
+ eth0(2) clsact/egress fbflow_egress id 130245
+
+|
+| **# bpftool -jp net**
+
+::
+
+ [{
+ "xdp": [{
+ "devname": "eth0",
+ "ifindex": 2,
+ "mode": "driver",
+ "id": 198
+ }
+ ],
+ "tc": [{
+ "devname": "eth0",
+ "ifindex": 2,
+ "kind": "htb",
+ "name": "prefix_matcher.o:[cls_prefix_matcher_htb]",
+ "id": 111727,
+ "act": []
+ },{
+ "devname": "eth0",
+ "ifindex": 2,
+ "kind": "clsact/ingress",
+ "name": "fbflow_icmp",
+ "id": 130246,
+ "act": []
+ },{
+ "devname": "eth0",
+ "ifindex": 2,
+ "kind": "clsact/egress",
+ "name": "prefix_matcher.o:[cls_prefix_matcher_clsact]",
+ "id": 111726,
+ },{
+ "devname": "eth0",
+ "ifindex": 2,
+ "kind": "clsact/egress",
+ "name": "cls_fg_dscp",
+ "id": 108619,
+ "act": []
+ },{
+ "devname": "eth0",
+ "ifindex": 2,
+ "kind": "clsact/egress",
+ "name": "fbflow_egress",
+ "id": 130245,
+ }
+ ]
+ }
+ ]
+
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index b6f5d560460d..8dda77daeda9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
- *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
+ *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@@ -32,6 +32,8 @@ SYNOPSIS
*PERF-COMMANDS* := { **show** | **list** | **help** }
+ *NET-COMMANDS* := { **show** | **list** | **help** }
+
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects
@@ -58,4 +60,4 @@ OPTIONS
SEE ALSO
========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
- **bpftool-perf**\ (8)
+ **bpftool-perf**\ (8), **bpftool-net**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 598066c40191..df1060b852c1 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -494,10 +494,10 @@ _bpftool()
_filedir
return 0
;;
- tree)
- _filedir
- return 0
- ;;
+ tree)
+ _filedir
+ return 0
+ ;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
device bind4 bind6 post_bind4 post_bind6 connect4 \
@@ -552,6 +552,15 @@ _bpftool()
;;
esac
;;
+ net)
+ case $command in
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help \
+ show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d15a62be6cf0..79dc3f193547 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -85,7 +85,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup | perf }\n"
+ " OBJECT := { prog | map | cgroup | perf | net }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -215,6 +215,7 @@ static const struct cmd cmds[] = {
{ "map", do_map },
{ "cgroup", do_cgroup },
{ "perf", do_perf },
+ { "net", do_net },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 238e734d75b3..40492cdc4e53 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -136,6 +136,7 @@ int do_map(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
int do_perf(int argc, char **arg);
+int do_net(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd(int *argc, char ***argv);
@@ -165,4 +166,11 @@ struct btf_dumper {
*/
int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
const void *data);
+
+struct nlattr;
+struct ifinfomsg;
+struct tcmsg;
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
+int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
+ const char *devname, int ifindex);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 9c55077ca5dd..e22fbe8b975f 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -71,6 +71,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_XSKMAP] = "xskmap",
[BPF_MAP_TYPE_SOCKHASH] = "sockhash",
[BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
+ [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
};
static bool map_is_per_cpu(__u32 type)
@@ -673,12 +674,6 @@ static int do_dump(int argc, char **argv)
if (fd < 0)
return -1;
- if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
- p_err("Dumping maps of maps and program maps not supported");
- close(fd);
- return -1;
- }
-
key = malloc(info.key_size);
value = alloc_value(&info);
if (!key || !value) {
@@ -732,7 +727,9 @@ static int do_dump(int argc, char **argv)
} else {
print_entry_plain(&info, key, value);
}
- } else {
+ num_elems++;
+ } else if (!map_is_map_of_maps(info.type) &&
+ !map_is_map_of_progs(info.type)) {
if (json_output) {
jsonw_name(json_wtr, "key");
print_hex_data_json(key, info.key_size);
@@ -749,7 +746,6 @@ static int do_dump(int argc, char **argv)
}
prev_key = key;
- num_elems++;
}
if (json_output)
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
new file mode 100644
index 000000000000..ed205ee57655
--- /dev/null
+++ b/tools/bpf/bpftool/net.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libbpf.h>
+#include <net/if.h>
+#include <linux/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+#include <sys/socket.h>
+
+#include <bpf.h>
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+struct ip_devname_ifindex {
+ char devname[64];
+ int ifindex;
+};
+
+struct bpf_netdev_t {
+ struct ip_devname_ifindex *devices;
+ int used_len;
+ int array_len;
+ int filter_idx;
+};
+
+struct tc_kind_handle {
+ char kind[64];
+ int handle;
+};
+
+struct bpf_tcinfo_t {
+ struct tc_kind_handle *handle_array;
+ int used_len;
+ int array_len;
+ bool is_qdisc;
+};
+
+struct bpf_filter_t {
+ const char *kind;
+ const char *devname;
+ int ifindex;
+};
+
+static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+ struct bpf_netdev_t *netinfo = cookie;
+ struct ifinfomsg *ifinfo = msg;
+
+ if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index)
+ return 0;
+
+ if (netinfo->used_len == netinfo->array_len) {
+ netinfo->devices = realloc(netinfo->devices,
+ (netinfo->array_len + 16) *
+ sizeof(struct ip_devname_ifindex));
+ if (!netinfo->devices)
+ return -ENOMEM;
+
+ netinfo->array_len += 16;
+ }
+ netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index;
+ snprintf(netinfo->devices[netinfo->used_len].devname,
+ sizeof(netinfo->devices[netinfo->used_len].devname),
+ "%s",
+ tb[IFLA_IFNAME] ? nla_getattr_str(tb[IFLA_IFNAME]) : "");
+ netinfo->used_len++;
+
+ return do_xdp_dump(ifinfo, tb);
+}
+
+static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+ struct bpf_tcinfo_t *tcinfo = cookie;
+ struct tcmsg *info = msg;
+
+ if (tcinfo->is_qdisc) {
+ /* skip clsact qdisc */
+ if (tb[TCA_KIND] &&
+ strcmp(nla_data(tb[TCA_KIND]), "clsact") == 0)
+ return 0;
+ if (info->tcm_handle == 0)
+ return 0;
+ }
+
+ if (tcinfo->used_len == tcinfo->array_len) {
+ tcinfo->handle_array = realloc(tcinfo->handle_array,
+ (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle));
+ if (!tcinfo->handle_array)
+ return -ENOMEM;
+
+ tcinfo->array_len += 16;
+ }
+ tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle;
+ snprintf(tcinfo->handle_array[tcinfo->used_len].kind,
+ sizeof(tcinfo->handle_array[tcinfo->used_len].kind),
+ "%s",
+ tb[TCA_KIND] ? nla_getattr_str(tb[TCA_KIND]) : "unknown");
+ tcinfo->used_len++;
+
+ return 0;
+}
+
+static int dump_filter_nlmsg(void *cookie, void *msg, struct nlattr **tb)
+{
+ const struct bpf_filter_t *filter_info = cookie;
+
+ return do_filter_dump((struct tcmsg *)msg, tb, filter_info->kind,
+ filter_info->devname, filter_info->ifindex);
+}
+
+static int show_dev_tc_bpf(int sock, unsigned int nl_pid,
+ struct ip_devname_ifindex *dev)
+{
+ struct bpf_filter_t filter_info;
+ struct bpf_tcinfo_t tcinfo;
+ int i, handle, ret = 0;
+
+ tcinfo.handle_array = NULL;
+ tcinfo.used_len = 0;
+ tcinfo.array_len = 0;
+
+ tcinfo.is_qdisc = false;
+ ret = nl_get_class(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg,
+ &tcinfo);
+ if (ret)
+ goto out;
+
+ tcinfo.is_qdisc = true;
+ ret = nl_get_qdisc(sock, nl_pid, dev->ifindex, dump_class_qdisc_nlmsg,
+ &tcinfo);
+ if (ret)
+ goto out;
+
+ filter_info.devname = dev->devname;
+ filter_info.ifindex = dev->ifindex;
+ for (i = 0; i < tcinfo.used_len; i++) {
+ filter_info.kind = tcinfo.handle_array[i].kind;
+ ret = nl_get_filter(sock, nl_pid, dev->ifindex,
+ tcinfo.handle_array[i].handle,
+ dump_filter_nlmsg,
+ &filter_info);
+ if (ret)
+ goto out;
+ }
+
+ /* root, ingress and egress handle */
+ handle = TC_H_ROOT;
+ filter_info.kind = "root";
+ ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
+ if (ret)
+ goto out;
+
+ handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ filter_info.kind = "clsact/ingress";
+ ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
+ if (ret)
+ goto out;
+
+ handle = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
+ filter_info.kind = "clsact/egress";
+ ret = nl_get_filter(sock, nl_pid, dev->ifindex, handle,
+ dump_filter_nlmsg, &filter_info);
+ if (ret)
+ goto out;
+
+out:
+ free(tcinfo.handle_array);
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ int i, sock, ret, filter_idx = -1;
+ struct bpf_netdev_t dev_array;
+ unsigned int nl_pid;
+ char err_buf[256];
+
+ if (argc == 2) {
+ if (strcmp(argv[0], "dev") != 0)
+ usage();
+ filter_idx = if_nametoindex(argv[1]);
+ if (filter_idx == 0) {
+ fprintf(stderr, "invalid dev name %s\n", argv[1]);
+ return -1;
+ }
+ } else if (argc != 0) {
+ usage();
+ }
+
+ sock = bpf_netlink_open(&nl_pid);
+ if (sock < 0) {
+ fprintf(stderr, "failed to open netlink sock\n");
+ return -1;
+ }
+
+ dev_array.devices = NULL;
+ dev_array.used_len = 0;
+ dev_array.array_len = 0;
+ dev_array.filter_idx = filter_idx;
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ NET_START_OBJECT;
+ NET_START_ARRAY("xdp", "%s:\n");
+ ret = nl_get_link(sock, nl_pid, dump_link_nlmsg, &dev_array);
+ NET_END_ARRAY("\n");
+
+ if (!ret) {
+ NET_START_ARRAY("tc", "%s:\n");
+ for (i = 0; i < dev_array.used_len; i++) {
+ ret = show_dev_tc_bpf(sock, nl_pid,
+ &dev_array.devices[i]);
+ if (ret)
+ break;
+ }
+ NET_END_ARRAY("\n");
+ }
+ NET_END_OBJECT;
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ if (ret) {
+ if (json_output)
+ jsonw_null(json_wtr);
+ libbpf_strerror(ret, err_buf, sizeof(err_buf));
+ fprintf(stderr, "Error: %s\n", err_buf);
+ }
+ free(dev_array.devices);
+ close(sock);
+ return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s { show | list } [dev <devname>]\n"
+ " %s %s help\n"
+ "Note: Only xdp and tc attachments are supported now.\n"
+ " For progs attached to cgroups, use \"bpftool cgroup\"\n"
+ " to dump program attachments. For program types\n"
+ " sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
+ " consult iproute2.\n",
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_net(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.c b/tools/bpf/bpftool/netlink_dumper.c
new file mode 100644
index 000000000000..6f5e9cc6836c
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <libbpf.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_bpf.h>
+
+#include <nlattr.h>
+#include "main.h"
+#include "netlink_dumper.h"
+
+static void xdp_dump_prog_id(struct nlattr **tb, int attr,
+ const char *mode,
+ bool new_json_object)
+{
+ if (!tb[attr])
+ return;
+
+ if (new_json_object)
+ NET_START_OBJECT
+ NET_DUMP_STR("mode", " %s", mode);
+ NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[attr]))
+ if (new_json_object)
+ NET_END_OBJECT
+}
+
+static int do_xdp_dump_one(struct nlattr *attr, unsigned int ifindex,
+ const char *name)
+{
+ struct nlattr *tb[IFLA_XDP_MAX + 1];
+ unsigned char mode;
+
+ if (nla_parse_nested(tb, IFLA_XDP_MAX, attr, NULL) < 0)
+ return -1;
+
+ if (!tb[IFLA_XDP_ATTACHED])
+ return 0;
+
+ mode = nla_getattr_u8(tb[IFLA_XDP_ATTACHED]);
+ if (mode == XDP_ATTACHED_NONE)
+ return 0;
+
+ NET_START_OBJECT;
+ if (name)
+ NET_DUMP_STR("devname", "%s", name);
+ NET_DUMP_UINT("ifindex", "(%d)", ifindex);
+
+ if (mode == XDP_ATTACHED_MULTI) {
+ if (json_output) {
+ jsonw_name(json_wtr, "multi_attachments");
+ jsonw_start_array(json_wtr);
+ }
+ xdp_dump_prog_id(tb, IFLA_XDP_SKB_PROG_ID, "generic", true);
+ xdp_dump_prog_id(tb, IFLA_XDP_DRV_PROG_ID, "driver", true);
+ xdp_dump_prog_id(tb, IFLA_XDP_HW_PROG_ID, "offload", true);
+ if (json_output)
+ jsonw_end_array(json_wtr);
+ } else if (mode == XDP_ATTACHED_DRV) {
+ xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "driver", false);
+ } else if (mode == XDP_ATTACHED_SKB) {
+ xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "generic", false);
+ } else if (mode == XDP_ATTACHED_HW) {
+ xdp_dump_prog_id(tb, IFLA_XDP_PROG_ID, "offload", false);
+ }
+
+ NET_END_OBJECT_FINAL;
+ return 0;
+}
+
+int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb)
+{
+ if (!tb[IFLA_XDP])
+ return 0;
+
+ return do_xdp_dump_one(tb[IFLA_XDP], ifinfo->ifi_index,
+ nla_getattr_str(tb[IFLA_IFNAME]));
+}
+
+static int do_bpf_dump_one_act(struct nlattr *attr)
+{
+ struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+
+ if (nla_parse_nested(tb, TCA_ACT_BPF_MAX, attr, NULL) < 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ if (!tb[TCA_ACT_BPF_PARMS])
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ NET_START_OBJECT_NESTED2;
+ if (tb[TCA_ACT_BPF_NAME])
+ NET_DUMP_STR("name", "%s",
+ nla_getattr_str(tb[TCA_ACT_BPF_NAME]));
+ if (tb[TCA_ACT_BPF_ID])
+ NET_DUMP_UINT("id", " id %u",
+ nla_getattr_u32(tb[TCA_ACT_BPF_ID]));
+ NET_END_OBJECT_NESTED;
+ return 0;
+}
+
+static int do_dump_one_act(struct nlattr *attr)
+{
+ struct nlattr *tb[TCA_ACT_MAX + 1];
+
+ if (!attr)
+ return 0;
+
+ if (nla_parse_nested(tb, TCA_ACT_MAX, attr, NULL) < 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ if (tb[TCA_ACT_KIND] && strcmp(nla_data(tb[TCA_ACT_KIND]), "bpf") == 0)
+ return do_bpf_dump_one_act(tb[TCA_ACT_OPTIONS]);
+
+ return 0;
+}
+
+static int do_bpf_act_dump(struct nlattr *attr)
+{
+ struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+ int act, ret;
+
+ if (nla_parse_nested(tb, TCA_ACT_MAX_PRIO, attr, NULL) < 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ NET_START_ARRAY("act", " %s [");
+ for (act = 0; act <= TCA_ACT_MAX_PRIO; act++) {
+ ret = do_dump_one_act(tb[act]);
+ if (ret)
+ break;
+ }
+ NET_END_ARRAY("] ");
+
+ return ret;
+}
+
+static int do_bpf_filter_dump(struct nlattr *attr)
+{
+ struct nlattr *tb[TCA_BPF_MAX + 1];
+ int ret;
+
+ if (nla_parse_nested(tb, TCA_BPF_MAX, attr, NULL) < 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ if (tb[TCA_BPF_NAME])
+ NET_DUMP_STR("name", " %s", nla_getattr_str(tb[TCA_BPF_NAME]));
+ if (tb[TCA_BPF_ID])
+ NET_DUMP_UINT("id", " id %u", nla_getattr_u32(tb[TCA_BPF_ID]));
+ if (tb[TCA_BPF_ACT]) {
+ ret = do_bpf_act_dump(tb[TCA_BPF_ACT]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int do_filter_dump(struct tcmsg *info, struct nlattr **tb, const char *kind,
+ const char *devname, int ifindex)
+{
+ int ret = 0;
+
+ if (tb[TCA_OPTIONS] && strcmp(nla_data(tb[TCA_KIND]), "bpf") == 0) {
+ NET_START_OBJECT;
+ if (devname[0] != '\0')
+ NET_DUMP_STR("devname", "%s", devname);
+ NET_DUMP_UINT("ifindex", "(%u)", ifindex);
+ NET_DUMP_STR("kind", " %s", kind);
+ ret = do_bpf_filter_dump(tb[TCA_OPTIONS]);
+ NET_END_OBJECT_FINAL;
+ }
+
+ return ret;
+}
diff --git a/tools/bpf/bpftool/netlink_dumper.h b/tools/bpf/bpftool/netlink_dumper.h
new file mode 100644
index 000000000000..0788cfbbed0e
--- /dev/null
+++ b/tools/bpf/bpftool/netlink_dumper.h
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+
+#ifndef _NETLINK_DUMPER_H_
+#define _NETLINK_DUMPER_H_
+
+#define NET_START_OBJECT \
+{ \
+ if (json_output) \
+ jsonw_start_object(json_wtr); \
+}
+
+#define NET_START_OBJECT_NESTED(name) \
+{ \
+ if (json_output) { \
+ jsonw_name(json_wtr, name); \
+ jsonw_start_object(json_wtr); \
+ } else { \
+ fprintf(stderr, "%s {", name); \
+ } \
+}
+
+#define NET_START_OBJECT_NESTED2 \
+{ \
+ if (json_output) \
+ jsonw_start_object(json_wtr); \
+ else \
+ fprintf(stderr, "{"); \
+}
+
+#define NET_END_OBJECT_NESTED \
+{ \
+ if (json_output) \
+ jsonw_end_object(json_wtr); \
+ else \
+ fprintf(stderr, "}"); \
+}
+
+#define NET_END_OBJECT \
+{ \
+ if (json_output) \
+ jsonw_end_object(json_wtr); \
+}
+
+#define NET_END_OBJECT_FINAL \
+{ \
+ if (json_output) \
+ jsonw_end_object(json_wtr); \
+ else \
+ fprintf(stderr, "\n"); \
+}
+
+#define NET_START_ARRAY(name, fmt_str) \
+{ \
+ if (json_output) { \
+ jsonw_name(json_wtr, name); \
+ jsonw_start_array(json_wtr); \
+ } else { \
+ fprintf(stderr, fmt_str, name); \
+ } \
+}
+
+#define NET_END_ARRAY(endstr) \
+{ \
+ if (json_output) \
+ jsonw_end_array(json_wtr); \
+ else \
+ fprintf(stderr, "%s", endstr); \
+}
+
+#define NET_DUMP_UINT(name, fmt_str, val) \
+{ \
+ if (json_output) \
+ jsonw_uint_field(json_wtr, name, val); \
+ else \
+ fprintf(stderr, fmt_str, val); \
+}
+
+#define NET_DUMP_STR(name, fmt_str, str) \
+{ \
+ if (json_output) \
+ jsonw_string_field(json_wtr, name, str);\
+ else \
+ fprintf(stderr, fmt_str, str); \
+}
+
+#define NET_DUMP_STR_ONLY(str) \
+{ \
+ if (json_output) \
+ jsonw_string(json_wtr, str); \
+ else \
+ fprintf(stderr, "%s ", str); \
+}
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index dce960d22106..b1cd3bc8db70 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
[BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
};
static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index dbf6e8bd98ba..bbb2a8ef367c 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -286,7 +286,7 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size)
* Found a match; just move the remaining
* entries up.
*/
- if (i == num_records) {
+ if (i == (num_records - 1)) {
kvp_file_info[pool].num_records--;
kvp_update_file(pool);
return 0;
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
index 6b0c36a58fcb..e56997288f2b 100644
--- a/tools/include/linux/lockdep.h
+++ b/tools/include/linux/lockdep.h
@@ -30,9 +30,12 @@ struct task_struct {
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
int pid;
+ int state;
char comm[17];
};
+#define TASK_RUNNING 0
+
extern struct task_struct *__curr(void);
#define current (__curr())
diff --git a/tools/include/linux/nmi.h b/tools/include/linux/nmi.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/include/linux/nmi.h
diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h
index 664ced8cb1b0..e907ba6f15e5 100644
--- a/tools/include/tools/libc_compat.h
+++ b/tools/include/tools/libc_compat.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause)
/* Copyright (C) 2018 Netronome Systems, Inc. */
#ifndef __TOOLS_LIBC_COMPAT_H
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 42990676a55e..df4bedb9b01c 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -734,9 +734,11 @@ __SYSCALL(__NR_pkey_free, sys_pkey_free)
__SYSCALL(__NR_statx, sys_statx)
#define __NR_io_pgetevents 292
__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_rseq 293
+__SYSCALL(__NR_rseq, sys_rseq)
#undef __NR_syscalls
-#define __NR_syscalls 293
+#define __NR_syscalls 294
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 9c660e1688ab..300f336633f2 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -687,6 +687,15 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_ASPECT_RATIO 4
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
+
/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 66917a4eba27..aa5ccd2385ed 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
};
enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
BPF_CGROUP_UDP4_SENDMSG,
BPF_CGROUP_UDP6_SENDMSG,
BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
__MAX_BPF_ATTACH_TYPE
};
@@ -2333,6 +2335,7 @@ struct __sk_buff {
/* ... here. */
__u32 data_meta;
+ struct bpf_flow_keys *flow_keys;
};
struct bpf_tunnel_key {
@@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 141cbfdc5865..58faab897201 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -165,6 +165,8 @@ enum {
IFLA_CARRIER_UP_COUNT,
IFLA_CARRIER_DOWN_COUNT,
IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
__IFLA_MAX
};
@@ -335,6 +337,7 @@ enum {
IFLA_BRPORT_GROUP_FWD_MASK,
IFLA_BRPORT_NEIGH_SUPPRESS,
IFLA_BRPORT_ISOLATED,
+ IFLA_BRPORT_BACKUP_PORT,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -460,6 +463,16 @@ enum {
#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+/* XFRM section */
+enum {
+ IFLA_XFRM_UNSPEC,
+ IFLA_XFRM_LINK,
+ IFLA_XFRM_IF_ID,
+ __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
enum macsec_validation_type {
MACSEC_VALIDATE_DISABLED = 0,
MACSEC_VALIDATE_CHECK = 1,
@@ -922,6 +935,7 @@ enum {
XDP_ATTACHED_DRV,
XDP_ATTACHED_SKB,
XDP_ATTACHED_HW,
+ XDP_ATTACHED_MULTI,
};
enum {
@@ -930,6 +944,9 @@ enum {
IFLA_XDP_ATTACHED,
IFLA_XDP_FLAGS,
IFLA_XDP_PROG_ID,
+ IFLA_XDP_DRV_PROG_ID,
+ IFLA_XDP_SKB_PROG_ID,
+ IFLA_XDP_HW_PROG_ID,
__IFLA_XDP_MAX,
};
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index b6270a3b38e9..07548de5c988 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -949,6 +949,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_GET_MSR_FEATURES 153
#define KVM_CAP_HYPERV_EVENTFD 154
#define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_S390_HPAGE_1M 156
+#define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1391,6 +1394,9 @@ struct kvm_enc_region {
/* Available with KVM_CAP_HYPERV_EVENTFD */
#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd)
+/* Available with KVM_CAP_NESTED_STATE */
+#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
+#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index eeb787b1c53c..f35eb72739c0 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -144,7 +144,7 @@ enum perf_event_sample_format {
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
- __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63,
+ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
/*
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index c51f8e5cc608..84c3de89696a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -65,6 +65,7 @@ struct vhost_iotlb_msg {
};
#define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
struct vhost_msg {
int type;
@@ -74,6 +75,15 @@ struct vhost_msg {
};
};
+struct vhost_msg_v2 {
+ __u32 type;
+ __u32 reserved;
+ union {
+ struct vhost_iotlb_msg iotlb;
+ __u8 padding[64];
+ };
+};
+
struct vhost_memory_region {
__u64 guest_phys_addr;
__u64 memory_size; /* bytes */
@@ -160,6 +170,14 @@ struct vhost_memory {
#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
struct vhost_vring_state)
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
+
/* VHOST_NET specific defines */
/* Attach virtio net ring to a raw socket, or tap device.
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 439b8a27488d..195ba486640f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1325,7 +1325,7 @@ class Tui(object):
msg = ''
while True:
self.screen.erase()
- self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' %
+ self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
DELAY_DEFAULT, curses.A_BOLD)
self.screen.addstr(4, 0, msg)
self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 13a861135127..7bc31c905018 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 60aa4ca8b2c5..3878a26a2071 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -28,16 +28,8 @@
#include <linux/bpf.h>
#include "bpf.h"
#include "libbpf.h"
-#include "nlattr.h"
-#include <linux/rtnetlink.h>
-#include <linux/if_link.h>
-#include <sys/socket.h>
#include <errno.h>
-#ifndef SOL_NETLINK
-#define SOL_NETLINK 270
-#endif
-
/*
* When building perf, unistd.h is overridden. __NR_bpf is
* required to be defined explicitly.
@@ -499,127 +491,6 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)
return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
}
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
- socklen_t addrlen;
- int one = 1;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- return -errno;
- }
-
- if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
- &one, sizeof(one)) < 0) {
- fprintf(stderr, "Netlink error reporting not supported\n");
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- ret = -errno;
- goto cleanup;
- }
-
- addrlen = sizeof(sa);
- if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
- ret = -errno;
- goto cleanup;
- }
-
- if (addrlen != sizeof(sa)) {
- ret = -LIBBPF_ERRNO__INTERNAL;
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
-
- /* started nested attribute for XDP */
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | IFLA_XDP;
- nla->nla_len = NLA_HDRLEN;
-
- /* add XDP fd */
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FD;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len += nla_xdp->nla_len;
-
- /* if user passed in any flags, add those too */
- if (flags) {
- nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
- nla_xdp->nla_type = IFLA_XDP_FLAGS;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
- nla->nla_len += nla_xdp->nla_len;
- }
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- ret = -errno;
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- ret = -errno;
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != sa.nl_pid) {
- ret = -LIBBPF_ERRNO__WRNGPID;
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- ret = -LIBBPF_ERRNO__INVSEQ;
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- ret = err->error;
- nla_dump_errormsg(nh);
- goto cleanup;
- case NLMSG_DONE:
- break;
- default:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
-
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2abd0f112627..4f8d43ae20d2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,6 +50,7 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
+#include "str_error.h"
#ifndef EM_BPF
#define EM_BPF 247
@@ -469,7 +470,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
obj->efile.fd = open(obj->path, O_RDONLY);
if (obj->efile.fd < 0) {
char errmsg[STRERR_BUFSIZE];
- char *cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ char *cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to open %s: %s\n", obj->path, cp);
return -errno;
@@ -810,8 +811,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
data->d_size, name, idx);
if (err) {
char errmsg[STRERR_BUFSIZE];
- char *cp = strerror_r(-err, errmsg,
- sizeof(errmsg));
+ char *cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to alloc program %s (%s): %s",
name, obj->path, cp);
@@ -1140,7 +1140,7 @@ bpf_object__create_maps(struct bpf_object *obj)
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_type_id) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, errno);
create_attr.btf_fd = 0;
@@ -1155,7 +1155,7 @@ bpf_object__create_maps(struct bpf_object *obj)
size_t j;
err = *pfd;
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to create map (name: '%s'): %s\n",
map->name, cp);
for (j = 0; j < i; j++)
@@ -1339,7 +1339,7 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
}
ret = -LIBBPF_ERRNO__LOAD;
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("load bpf program failed: %s\n", cp);
if (log_buf && log_buf[0] != '\0') {
@@ -1502,6 +1502,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_LIRC_MODE2:
case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
return false;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_KPROBE:
@@ -1654,7 +1655,7 @@ static int check_path(const char *path)
dir = dirname(dname);
if (statfs(dir, &st_fs)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to statfs %s: %s\n", dir, cp);
err = -errno;
}
@@ -1690,7 +1691,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin program: %s\n", cp);
return -errno;
}
@@ -1708,7 +1709,7 @@ static int make_dir(const char *path)
err = -errno;
if (err) {
- cp = strerror_r(-err, errmsg, sizeof(errmsg));
+ cp = str_error(-err, errmsg, sizeof(errmsg));
pr_warning("failed to mkdir %s: %s\n", path, cp);
}
return err;
@@ -1770,7 +1771,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
}
if (bpf_obj_pin(map->fd, path)) {
- cp = strerror_r(errno, errmsg, sizeof(errmsg));
+ cp = str_error(errno, errmsg, sizeof(errmsg));
pr_warning("failed to pin map: %s\n", cp);
return -errno;
}
@@ -2121,6 +2122,7 @@ static const struct {
BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2),
+ BPF_PROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR),
BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
@@ -2336,7 +2338,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
bpf_program__set_expected_attach_type(prog,
expected_attach_type);
- if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
+ if (!first_prog)
first_prog = prog;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 96c55fac54c3..e3b00e23e181 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -46,6 +46,7 @@ enum libbpf_errno {
LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
+ LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */
__LIBBPF_ERRNO__END,
};
@@ -297,4 +298,19 @@ int bpf_perf_event_read_simple(void *mem, unsigned long size,
unsigned long page_size,
void **buf, size_t *buf_len,
bpf_perf_event_print_t fn, void *priv);
+
+struct nlmsghdr;
+struct nlattr;
+typedef int (*dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, dump_nlmsg_t,
+ void *cookie);
+int bpf_netlink_open(unsigned int *nl_pid);
+int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg,
+ void *cookie);
+int nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie);
#endif
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
index d9ba851bd7f9..2464ade3b326 100644
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -42,6 +42,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
[ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
[ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
[ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
};
int libbpf_strerror(int err, char *buf, size_t size)
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
new file mode 100644
index 000000000000..fde1d7bf8199
--- /dev/null
+++ b/tools/lib/bpf/netlink.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: LGPL-2.1
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+int bpf_netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ fprintf(stderr, "Netlink error reporting not supported\n");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ int sock, seq = 0, ret;
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ __u32 nl_pid;
+
+ sock = bpf_netlink_open(&nl_pid);
+ if (sock < 0)
+ return sock;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FLAGS;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+ ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh, dump_nlmsg_t dump_link_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+int nl_get_link(int sock, unsigned int nl_pid, dump_nlmsg_t dump_link_nlmsg,
+ void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+int nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+int nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+int nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 4719434278b2..49f514119bdb 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -26,11 +26,6 @@ static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
[NLA_FLAG] = 0,
};
-static int nla_len(const struct nlattr *nla)
-{
- return nla->nla_len - NLA_HDRLEN;
-}
-
static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
{
int totlen = NLA_ALIGN(nla->nla_len);
@@ -46,11 +41,6 @@ static int nla_ok(const struct nlattr *nla, int remaining)
nla->nla_len <= remaining;
}
-static void *nla_data(const struct nlattr *nla)
-{
- return (char *) nla + NLA_HDRLEN;
-}
-
static int nla_type(const struct nlattr *nla)
{
return nla->nla_type & NLA_TYPE_MASK;
@@ -114,8 +104,8 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh)
* @see nla_validate
* @return 0 on success or a negative error code.
*/
-static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
- struct nla_policy *policy)
+int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+ struct nla_policy *policy)
{
struct nlattr *nla;
int rem, err;
@@ -146,6 +136,25 @@ errout:
return err;
}
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg nla Nested Attribute.
+ * @arg policy Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to nla_parse().
+ *
+ * @see nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ struct nla_policy *policy)
+{
+ return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
+}
+
/* dump netlink extended ack error message */
int nla_dump_errormsg(struct nlmsghdr *nlh)
{
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
index 931a71f68f93..a6e2396bce7c 100644
--- a/tools/lib/bpf/nlattr.h
+++ b/tools/lib/bpf/nlattr.h
@@ -67,6 +67,44 @@ struct nla_policy {
nla_ok(pos, rem); \
pos = nla_next(pos, &(rem)))
+/**
+ * nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t nla_getattr_u8(const struct nlattr *nla)
+{
+ return *(uint8_t *)nla_data(nla);
+}
+
+static inline uint32_t nla_getattr_u32(const struct nlattr *nla)
+{
+ return *(uint32_t *)nla_data(nla);
+}
+
+static inline const char *nla_getattr_str(const struct nlattr *nla)
+{
+ return (const char *)nla_data(nla);
+}
+
+/**
+ * nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int nla_len(const struct nlattr *nla)
+{
+ return nla->nla_len - NLA_HDRLEN;
+}
+
+int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+ struct nla_policy *policy);
+int nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ struct nla_policy *policy);
+
int nla_dump_errormsg(struct nlmsghdr *nlh);
#endif /* __NLATTR_H */
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000000..b8798114a357
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *str_error(int err, char *dst, int len)
+{
+ int ret = strerror_r(err, dst, len);
+ if (ret)
+ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000000..355b1db571d1
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
+#ifndef BPF_STR_ERROR
+#define BPF_STR_ERROR
+
+char *str_error(int err, char *dst, int len);
+#endif // BPF_STR_ERROR
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 42261a9b280e..ac841bc5c35b 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -280,7 +280,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
mv $@+ $@
ifdef USE_ASCIIDOCTOR
-$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b manpage -d manpage \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index b3d1b12a5081..5224ade3d5af 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -777,14 +777,12 @@ endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifndef NO_LIBBPF
- $(call QUIET_INSTALL, lib) \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
- $(call QUIET_INSTALL, include/bpf) \
- $(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
- $(call QUIET_INSTALL, lib) \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
- $(call QUIET_INSTALL, examples/bpf) \
- $(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, bpf-headers) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+ $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, bpf-examples) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
+ $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
endif
$(call QUIET_INSTALL, perf-archive) \
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index f013b115dc86..dbef716a1913 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -11,7 +11,8 @@ PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
out := $(OUTPUT)arch/arm64/include/generated/asm
header := $(out)/syscalls.c
-sysdef := $(srctree)/tools/include/uapi/asm-generic/unistd.h
+incpath := $(srctree)/tools
+sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h
sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
systbl := $(sysprf)/mksyscalltbl
@@ -19,7 +20,7 @@ systbl := $(sysprf)/mksyscalltbl
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
$(header): $(sysdef) $(systbl)
- $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(sysdef) > $@
+ $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
clean::
$(call QUIET_CLEAN, arm64) $(RM) $(header)
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 52e197317d3e..2dbb8cade048 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -11,7 +11,8 @@
gcc=$1
hostcc=$2
-input=$3
+incpath=$3
+input=$4
if ! test -r $input; then
echo "Could not read input file" >&2
@@ -28,7 +29,6 @@ create_table_from_c()
cat <<-_EoHEADER
#include <stdio.h>
- #define __ARCH_WANT_RENAMEAT
#include "$input"
int main(int argc, char *argv[])
{
@@ -42,7 +42,7 @@ create_table_from_c()
printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);"
printf "}\n"
- } | $hostcc -o $create_table_exe -x c -
+ } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
$create_table_exe
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 20e7d74d86cd..10a44e946f77 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
#endif
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
int arch__choose_best_symbol(struct symbol *syma,
struct symbol *symb __maybe_unused)
{
char *sym = syma->name;
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
/* Skip over any initial dot */
if (*sym == '.')
sym++;
+#endif
/* Avoid "SyS" kernel syscall aliases */
if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
@@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma,
return SYMBOL_A;
}
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
/* Allow matching against dot variants */
int arch__compare_symbol_names(const char *namea, const char *nameb)
{
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index c1bd979b957b..613709cfbbd0 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,6 +9,7 @@ struct test;
int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__bp_modify(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 8e2c5a38c3b9..586849ff83a0 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -5,3 +5,4 @@ libperf-y += arch-tests.o
libperf-y += rdpmc.o
libperf-y += perf-time-to-tsc.o
libperf-$(CONFIG_AUXTRACE) += insn-x86.o
+libperf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index cc1802ff5410..d47d3f8e3c8e 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -24,6 +24,12 @@ struct test arch_tests[] = {
.func = test__insn_x86,
},
#endif
+#if defined(__x86_64__)
+ {
+ .desc = "x86 bp modify",
+ .func = test__bp_modify,
+ },
+#endif
{
.func = NULL,
},
diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c
new file mode 100644
index 000000000000..f53e4406709f
--- /dev/null
+++ b/tools/perf/arch/x86/tests/bp-modify.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <asm/ptrace.h>
+#include <errno.h>
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+static noinline int bp_1(void)
+{
+ pr_debug("in %s\n", __func__);
+ return 0;
+}
+
+static noinline int bp_2(void)
+{
+ pr_debug("in %s\n", __func__);
+ return 0;
+}
+
+static int spawn_child(void)
+{
+ int child = fork();
+
+ if (child == 0) {
+ /*
+ * The child sets itself for as tracee and
+ * waits in signal for parent to trace it,
+ * then it calls bp_1 and quits.
+ */
+ int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+
+ if (err) {
+ pr_debug("failed to PTRACE_TRACEME\n");
+ exit(1);
+ }
+
+ raise(SIGCONT);
+ bp_1();
+ exit(0);
+ }
+
+ return child;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it and checks it was properly changed.
+ */
+static int bp_modify1(void)
+{
+ pid_t child;
+ int status;
+ unsigned long rip = 0, dr7 = 1;
+
+ child = spawn_child();
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 1\n");
+ return TEST_FAIL;
+ }
+
+ /*
+ * The parent does following steps:
+ * - creates a new breakpoint (id 0) for bp_2 function
+ * - changes that breakponit to bp_1 function
+ * - waits for the breakpoint to hit and checks
+ * it has proper rip of bp_1 function
+ * - detaches the child
+ */
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_2)) {
+ pr_debug("failed to set breakpoint, 1st time: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_1)) {
+ pr_debug("failed to set breakpoint, 2nd time: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[7]), dr7)) {
+ pr_debug("failed to set dr7: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+ goto out;
+ }
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 2\n");
+ return TEST_FAIL;
+ }
+
+ rip = ptrace(PTRACE_PEEKUSER, child,
+ offsetof(struct user_regs_struct, rip), NULL);
+ if (rip == (unsigned long) -1) {
+ pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+ if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+ return TEST_FAIL;
+ }
+
+ return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it to bogus value and checks the original
+ * breakpoint is hit.
+ */
+static int bp_modify2(void)
+{
+ pid_t child;
+ int status;
+ unsigned long rip = 0, dr7 = 1;
+
+ child = spawn_child();
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 1\n");
+ return TEST_FAIL;
+ }
+
+ /*
+ * The parent does following steps:
+ * - creates a new breakpoint (id 0) for bp_1 function
+ * - tries to change that breakpoint to (-1) address
+ * - waits for the breakpoint to hit and checks
+ * it has proper rip of bp_1 function
+ * - detaches the child
+ */
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_1)) {
+ pr_debug("failed to set breakpoint: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[7]), dr7)) {
+ pr_debug("failed to set dr7: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (!ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) {
+ pr_debug("failed, breakpoint set to bogus address\n");
+ goto out;
+ }
+
+ if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+ goto out;
+ }
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 2\n");
+ return TEST_FAIL;
+ }
+
+ rip = ptrace(PTRACE_PEEKUSER, child,
+ offsetof(struct user_regs_struct, rip), NULL);
+ if (rip == (unsigned long) -1) {
+ pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+ if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+ return TEST_FAIL;
+ }
+
+ return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+int test__bp_modify(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
+ TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2());
+
+ return 0;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 20061cf42288..28cd6a17491b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -246,8 +246,14 @@ find_target:
indirect_call:
tok = strchr(endptr, '*');
- if (tok != NULL)
- ops->target.addr = strtoull(tok + 1, NULL, 16);
+ if (tok != NULL) {
+ endptr++;
+
+ /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
+ * Do not parse such instruction. */
+ if (strstr(endptr, "(%r") == NULL)
+ ops->target.addr = strtoull(endptr, NULL, 16);
+ }
goto find_target;
}
@@ -276,7 +282,19 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == &call_ops || ins->ops == &s390_call_ops;
}
-static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms)
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+ if (ops->raw_comment && c > ops->raw_comment)
+ return NULL;
+
+ return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
{
struct map *map = ms->map;
struct symbol *sym = ms->sym;
@@ -285,6 +303,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
};
const char *c = strchr(ops->raw, ',');
u64 start, end;
+
+ ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ c = validate_comma(c, ops);
+
/*
* Examples of lines to parse for the _cpp_lex_token@@Base
* function:
@@ -304,6 +326,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op
ops->target.addr = strtoull(c, NULL, 16);
if (!ops->target.addr) {
c = strchr(c, ',');
+ c = validate_comma(c, ops);
if (c++ != NULL)
ops->target.addr = strtoull(c, NULL, 16);
}
@@ -361,9 +384,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
c = strchr(ops->raw, ',');
+ c = validate_comma(c, ops);
+
if (c != NULL) {
const char *c2 = strchr(c + 1, ',');
+ c2 = validate_comma(c2, ops);
/* check for 3-op insn */
if (c2 != NULL)
c = c2;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 005a5fe8a8c6..5399ba2321bb 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -22,6 +22,7 @@ struct ins {
struct ins_operands {
char *raw;
+ char *raw_comment;
struct {
char *raw;
char *name;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c980bbff6353..1a61628a1c12 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -251,8 +251,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
- if (evsel != NULL)
- perf_evsel__init(evsel, attr, idx);
+ if (!evsel)
+ return NULL;
+ perf_evsel__init(evsel, attr, idx);
if (perf_evsel__is_bpf_output(evsel)) {
evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 36d0763311ef..6a6929f208b4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -576,6 +576,13 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg,
return NULL;
}
+static bool map__contains_symbol(struct map *map, struct symbol *sym)
+{
+ u64 ip = map->unmap_ip(map, sym->start);
+
+ return ip >= map->start && ip < map->end;
+}
+
struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
struct map **mapp)
{
@@ -591,6 +598,10 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
if (sym == NULL)
continue;
+ if (!map__contains_symbol(pos, sym)) {
+ sym = NULL;
+ continue;
+ }
if (mapp != NULL)
*mapp = pos;
goto out;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index c85d0d1a65ed..7b0ca7cbb7de 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -377,7 +377,7 @@ out:
static int record_saved_cmdline(void)
{
- unsigned int size;
+ unsigned long long size;
char *path;
struct stat st;
int ret, err = 0;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 920b1d58a068..e76214f8d596 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -164,16 +164,15 @@ void parse_ftrace_printk(struct tep_handle *pevent,
void parse_saved_cmdline(struct tep_handle *pevent,
char *file, unsigned int size __maybe_unused)
{
- char *comm;
+ char comm[17]; /* Max comm length in the kernel is 16. */
char *line;
char *next = NULL;
int pid;
line = strtok_r(file, "\n", &next);
while (line) {
- sscanf(line, "%d %ms", &pid, &comm);
- tep_register_comm(pevent, comm, pid);
- free(comm);
+ if (sscanf(line, "%d %16s", &pid, comm) == 2)
+ tep_register_comm(pevent, comm, pid);
line = strtok_r(NULL, "\n", &next);
}
}
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index 72c25a3cb658..d9a725478375 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -6,7 +6,7 @@ TEST_PROGS := run.sh
include ../lib.mk
-all:
+all: khdr
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/config
index b4ad748a9dd9..b4ad748a9dd9 100644
--- a/tools/testing/selftests/android/ion/config
+++ b/tools/testing/selftests/android/config
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index e03695287f76..88cfe88e466f 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -10,6 +10,8 @@ $(TEST_GEN_FILES): ipcsocket.c ionutils.c
TEST_PROGS := ion_test.sh
+KSFT_KHDR_INSTALL := 1
+top_srcdir = ../../../../..
include ../../lib.mk
$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 49938d72cf63..8a60c9b9892d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -19,3 +19,9 @@ test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
+test_skb_cgroup_id_user
+test_socket_cookie
+test_cgroup_storage
+test_select_reuseport
+test_flow_dissector
+flow_dissector_load
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..fd3851d5c079 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
- test_skb_cgroup_id_kern.o
+ test_skb_cgroup_id_kern.o bpf_flow.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -47,10 +47,12 @@ TEST_PROGS := test_kmod.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
- test_skb_cgroup_id.sh
+ test_skb_cgroup_id.sh \
+ test_flow_dissector.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \
+ flow_dissector_load test_flow_dissector
include ../lib.mk
diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..107350a7821d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
+ */
+enum {
+ IP,
+ IPV6,
+ IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
+ IPV6FR, /* Fragmentation IPv6 Extension Header */
+ MPLS,
+ VLAN,
+};
+
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+#define IP6_MF 0x0001
+#define IP6_OFFSET 0xFFF8
+
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+ __be16 flags;
+ __be16 proto;
+};
+
+struct frag_hdr {
+ __u8 nexthdr;
+ __u8 reserved;
+ __be16 frag_off;
+ __be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+ __u16 hdr_size,
+ void *buffer)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ __u16 nhoff = skb->flow_keys->nhoff;
+ __u8 *hdr;
+
+ /* Verifies this variable offset does not overflow */
+ if (nhoff > (USHRT_MAX - hdr_size))
+ return NULL;
+
+ hdr = data + nhoff;
+ if (hdr + hdr_size <= data_end)
+ return hdr;
+
+ if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+ return NULL;
+
+ return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->n_proto = proto;
+ switch (proto) {
+ case bpf_htons(ETH_P_IP):
+ bpf_tail_call(skb, &jmp_table, IP);
+ break;
+ case bpf_htons(ETH_P_IPV6):
+ bpf_tail_call(skb, &jmp_table, IPV6);
+ break;
+ case bpf_htons(ETH_P_MPLS_MC):
+ case bpf_htons(ETH_P_MPLS_UC):
+ bpf_tail_call(skb, &jmp_table, MPLS);
+ break;
+ case bpf_htons(ETH_P_8021Q):
+ case bpf_htons(ETH_P_8021AD):
+ bpf_tail_call(skb, &jmp_table, VLAN);
+ break;
+ default:
+ /* Protocol not supported */
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+SEC("dissect")
+int _dissect(struct __sk_buff *skb)
+{
+ if (!skb->vlan_present)
+ return parse_eth_proto(skb, skb->protocol);
+ else
+ return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data_end = (void *)(long)skb->data_end;
+ struct icmphdr *icmp, _icmp;
+ struct gre_hdr *gre, _gre;
+ struct ethhdr *eth, _eth;
+ struct tcphdr *tcp, _tcp;
+ struct udphdr *udp, _udp;
+
+ keys->ip_proto = proto;
+ switch (proto) {
+ case IPPROTO_ICMP:
+ icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+ if (!icmp)
+ return BPF_DROP;
+ return BPF_OK;
+ case IPPROTO_IPIP:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+ case IPPROTO_IPV6:
+ keys->is_encap = true;
+ return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+ case IPPROTO_GRE:
+ gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+ if (!gre)
+ return BPF_DROP;
+
+ if (bpf_htons(gre->flags & GRE_VERSION))
+ /* Only inspect standard GRE packets with version 0 */
+ return BPF_OK;
+
+ keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+ if (GRE_IS_CSUM(gre->flags))
+ keys->nhoff += 4; /* Step over chksum and Padding */
+ if (GRE_IS_KEY(gre->flags))
+ keys->nhoff += 4; /* Step over key */
+ if (GRE_IS_SEQ(gre->flags))
+ keys->nhoff += 4; /* Step over sequence number */
+
+ keys->is_encap = true;
+
+ if (gre->proto == bpf_htons(ETH_P_TEB)) {
+ eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+ &_eth);
+ if (!eth)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*eth);
+
+ return parse_eth_proto(skb, eth->h_proto);
+ } else {
+ return parse_eth_proto(skb, gre->proto);
+ }
+ case IPPROTO_TCP:
+ tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+ if (!tcp)
+ return BPF_DROP;
+
+ if (tcp->doff < 5)
+ return BPF_DROP;
+
+ if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+ return BPF_DROP;
+
+ keys->thoff = keys->nhoff;
+ keys->sport = tcp->source;
+ keys->dport = tcp->dest;
+ return BPF_OK;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+ if (!udp)
+ return BPF_DROP;
+
+ keys->thoff = keys->nhoff;
+ keys->sport = udp->source;
+ keys->dport = udp->dest;
+ return BPF_OK;
+ default:
+ return BPF_DROP;
+ }
+
+ return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+
+ keys->ip_proto = nexthdr;
+ switch (nexthdr) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_DSTOPTS:
+ bpf_tail_call(skb, &jmp_table, IPV6OP);
+ break;
+ case IPPROTO_FRAGMENT:
+ bpf_tail_call(skb, &jmp_table, IPV6FR);
+ break;
+ default:
+ return parse_ip_proto(skb, nexthdr);
+ }
+
+ return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph, _iph;
+ bool done = false;
+
+ iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+ if (!iph)
+ return BPF_DROP;
+
+ /* IP header cannot be smaller than 20 bytes */
+ if (iph->ihl < 5)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IP;
+ keys->ipv4_src = iph->saddr;
+ keys->ipv4_dst = iph->daddr;
+
+ keys->nhoff += iph->ihl << 2;
+ if (data + keys->nhoff > data_end)
+ return BPF_DROP;
+
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+ keys->is_frag = true;
+ if (iph->frag_off & bpf_htons(IP_OFFSET))
+ /* From second fragment on, packets do not have headers
+ * we can parse.
+ */
+ done = true;
+ else
+ keys->is_first_frag = true;
+ }
+
+ if (done)
+ return BPF_OK;
+
+ return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct ipv6hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ keys->addr_proto = ETH_P_IPV6;
+ memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+ keys->nhoff += sizeof(struct ipv6hdr);
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+ struct ipv6_opt_hdr *ip6h, _ip6h;
+
+ ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+ if (!ip6h)
+ return BPF_DROP;
+
+ /* hlen is in 8-octets and does not include the first 8 bytes
+ * of the header
+ */
+ skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+
+ return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct frag_hdr *fragh, _fragh;
+
+ fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+ if (!fragh)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*fragh);
+ keys->is_frag = true;
+ if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+ keys->is_first_frag = true;
+
+ return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+ struct mpls_label *mpls, _mpls;
+
+ mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+ if (!mpls)
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+ struct bpf_flow_keys *keys = skb->flow_keys;
+ struct vlan_hdr *vlan, _vlan;
+ __be16 proto;
+
+ /* Peek back to see if single or double-tagging */
+ if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+ sizeof(proto)))
+ return BPF_DROP;
+
+ /* Account for double-tagging */
+ if (proto == bpf_htons(ETH_P_8021AD)) {
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*vlan);
+ }
+
+ vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+ if (!vlan)
+ return BPF_DROP;
+
+ keys->nhoff += sizeof(*vlan);
+ /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+ vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+ return BPF_DROP;
+
+ return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index b4994a94968b..3655508f95fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -18,3 +18,4 @@ CONFIG_CRYPTO_HMAC=m
CONFIG_CRYPTO_SHA256=m
CONFIG_VXLAN=y
CONFIG_GENEVE=y
+CONFIG_NET_CLS_FLOWER=m
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
new file mode 100644
index 000000000000..d3273b5b3173
--- /dev/null
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
+const char *cfg_map_name = "jmp_table";
+bool cfg_attach = true;
+char *cfg_section_name;
+char *cfg_path_name;
+
+static void load_and_attach_program(void)
+{
+ struct bpf_program *prog, *main_prog;
+ struct bpf_map *prog_array;
+ int i, fd, prog_fd, ret;
+ struct bpf_object *obj;
+ int prog_array_fd;
+
+ ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj,
+ &prog_fd);
+ if (ret)
+ error(1, 0, "bpf_prog_load %s", cfg_path_name);
+
+ main_prog = bpf_object__find_program_by_title(obj, cfg_section_name);
+ if (!main_prog)
+ error(1, 0, "bpf_object__find_program_by_title %s",
+ cfg_section_name);
+
+ prog_fd = bpf_program__fd(main_prog);
+ if (prog_fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ prog_array = bpf_object__find_map_by_name(obj, cfg_map_name);
+ if (!prog_array)
+ error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name);
+
+ prog_array_fd = bpf_map__fd(prog_array);
+ if (prog_array_fd < 0)
+ error(1, 0, "bpf_map__fd %s", cfg_map_name);
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ fd = bpf_program__fd(prog);
+ if (fd < 0)
+ error(1, 0, "bpf_program__fd");
+
+ if (fd != prog_fd) {
+ printf("%d: %s\n", i, bpf_program__title(prog, false));
+ bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY);
+ ++i;
+ }
+ }
+
+ ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0);
+ if (ret)
+ error(1, 0, "bpf_prog_attach %s", cfg_path_name);
+
+ ret = bpf_object__pin(obj, cfg_pin_path);
+ if (ret)
+ error(1, 0, "bpf_object__pin %s", cfg_pin_path);
+
+}
+
+static void detach_program(void)
+{
+ char command[64];
+ int ret;
+
+ ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (ret)
+ error(1, 0, "bpf_prog_detach");
+
+ /* To unpin, it is necessary and sufficient to just remove this dir */
+ sprintf(command, "rm -r %s", cfg_pin_path);
+ ret = system(command);
+ if (ret)
+ error(1, errno, command);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ bool attach = false;
+ bool detach = false;
+ int c;
+
+ while ((c = getopt(argc, argv, "adp:s:")) != -1) {
+ switch (c) {
+ case 'a':
+ if (detach)
+ error(1, 0, "attach/detach are exclusive");
+ attach = true;
+ break;
+ case 'd':
+ if (attach)
+ error(1, 0, "attach/detach are exclusive");
+ detach = true;
+ break;
+ case 'p':
+ if (cfg_path_name)
+ error(1, 0, "only one prog name can be given");
+
+ cfg_path_name = optarg;
+ break;
+ case 's':
+ if (cfg_section_name)
+ error(1, 0, "only one section can be given");
+
+ cfg_section_name = optarg;
+ break;
+ }
+ }
+
+ if (detach)
+ cfg_attach = false;
+
+ if (cfg_attach && !cfg_path_name)
+ error(1, 0, "must provide a path to the BPF program");
+
+ if (cfg_attach && !cfg_section_name)
+ error(1, 0, "must provide a section name");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ if (cfg_attach)
+ load_and_attach_program();
+ else
+ detach_program();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
new file mode 100644
index 000000000000..12b784afba31
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inject packets with all sorts of encapsulation into the kernel.
+ *
+ * IPv4/IPv6 outer layer 3
+ * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
+ * IPv4/IPv6 inner layer 3
+ */
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <netinet/ip.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define CFG_PORT_INNER 8000
+
+/* Add some protocol definitions that do not exist in userspace */
+
+struct grehdr {
+ uint16_t unused;
+ uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+ union {
+ struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hlen:5,
+ control:1,
+ version:2;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u8 version:2,
+ control:1,
+ hlen:5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 proto_ctype;
+ __be16 flags;
+ };
+ __be32 word;
+ };
+};
+
+static uint8_t cfg_dsfield_inner;
+static uint8_t cfg_dsfield_outer;
+static uint8_t cfg_encap_proto;
+static bool cfg_expect_failure = false;
+static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */
+static int cfg_l3_inner = AF_UNSPEC;
+static int cfg_l3_outer = AF_UNSPEC;
+static int cfg_num_pkt = 10;
+static int cfg_num_secs = 0;
+static char cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static int cfg_port_gue = 6080;
+static bool cfg_only_rx;
+static bool cfg_only_tx;
+static int cfg_src_port = 9;
+
+static char buf[ETH_DATA_LEN];
+
+#define INIT_ADDR4(name, addr4, port) \
+ static struct sockaddr_in name = { \
+ .sin_family = AF_INET, \
+ .sin_port = __constant_htons(port), \
+ .sin_addr.s_addr = __constant_htonl(addr4), \
+ };
+
+#define INIT_ADDR6(name, addr6, port) \
+ static struct sockaddr_in6 name = { \
+ .sin6_family = AF_INET6, \
+ .sin6_port = __constant_htons(port), \
+ .sin6_addr = addr6, \
+ };
+
+INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
+INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
+INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
+INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
+INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
+
+INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
+INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
+
+static unsigned long util_gettime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void util_printaddr(const char *msg, struct sockaddr *addr)
+{
+ unsigned long off = 0;
+ char nbuf[INET6_ADDRSTRLEN];
+
+ switch (addr->sa_family) {
+ case PF_INET:
+ off = __builtin_offsetof(struct sockaddr_in, sin_addr);
+ break;
+ case PF_INET6:
+ off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
+ break;
+ default:
+ error(1, 0, "printaddr: unsupported family %u\n",
+ addr->sa_family);
+ }
+
+ if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
+ sizeof(nbuf)))
+ error(1, errno, "inet_ntop");
+
+ fprintf(stderr, "%s: %s\n", msg, nbuf);
+}
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static void build_ipv4_header(void *header, uint8_t proto,
+ uint32_t src, uint32_t dst,
+ int payload_len, uint8_t tos)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = tos;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = proto;
+ iph->saddr = src;
+ iph->daddr = dst;
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t) dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+ struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ int payload_len, uint8_t dsfield)
+{
+ struct ipv6hdr *ip6h = header;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 8;
+ ipv6_set_dsfield(ip6h, dsfield);
+
+ memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static uint16_t build_udp_v4_csum(const struct iphdr *iph,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
+ pseudo_sum += htons(IPPROTO_UDP);
+ pseudo_sum += udph->len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
+ const struct udphdr *udph,
+ int num_words)
+{
+ unsigned long pseudo_sum;
+ int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */
+
+ pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
+ pseudo_sum += htons(ip6h->nexthdr);
+ pseudo_sum += ip6h->payload_len;
+ return build_ip_csum((void *) udph, num_words, pseudo_sum);
+}
+
+static void build_udp_header(void *header, int payload_len,
+ uint16_t dport, int family)
+{
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(cfg_src_port);
+ udph->dest = htons(dport);
+ udph->len = htons(len);
+ udph->check = 0;
+ if (family == AF_INET)
+ udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+ udph, len >> 1);
+ else
+ udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+ udph, len >> 1);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+ struct guehdr *gueh = header;
+
+ gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+ struct grehdr *greh = header;
+
+ greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+ if (family == AF_INET)
+ return sizeof(struct iphdr);
+ else
+ return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(void)
+{
+ int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+ int el3_len = 0;
+
+ if (cfg_l3_extra)
+ el3_len = l3_length(cfg_l3_extra);
+
+ /* calculate header offsets */
+ if (cfg_encap_proto) {
+ ol3_len = l3_length(cfg_l3_outer);
+
+ if (cfg_encap_proto == IPPROTO_GRE)
+ ol4_len = sizeof(struct grehdr);
+ else if (cfg_encap_proto == IPPROTO_UDP)
+ ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ }
+
+ il3_len = l3_length(cfg_l3_inner);
+ il4_len = sizeof(struct udphdr);
+
+ if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
+ sizeof(buf))
+ error(1, 0, "packet too large\n");
+
+ /*
+ * Fill packet from inside out, to calculate correct checksums.
+ * But create ip before udp headers, as udp uses ip for pseudo-sum.
+ */
+ memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+ cfg_payload_char, cfg_payload_len);
+
+ /* add zero byte for udp csum padding */
+ buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
+
+ switch (cfg_l3_inner) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ in_saddr4.sin_addr.s_addr,
+ in_daddr4.sin_addr.s_addr,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+ IPPROTO_UDP,
+ &in_saddr6, &in_daddr6,
+ il4_len + cfg_payload_len,
+ cfg_dsfield_inner);
+ break;
+ }
+
+ build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+ cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
+
+ if (!cfg_encap_proto)
+ return il3_len + il4_len + cfg_payload_len;
+
+ switch (cfg_l3_outer) {
+ case PF_INET:
+ build_ipv4_header(buf + el3_len, cfg_encap_proto,
+ out_saddr4.sin_addr.s_addr,
+ out_daddr4.sin_addr.s_addr,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf + el3_len, cfg_encap_proto,
+ &out_saddr6, &out_daddr6,
+ ol4_len + il3_len + il4_len + cfg_payload_len,
+ cfg_dsfield_outer);
+ break;
+ }
+
+ switch (cfg_encap_proto) {
+ case IPPROTO_UDP:
+ build_gue_header(buf + el3_len + ol3_len + ol4_len -
+ sizeof(struct guehdr),
+ cfg_l3_inner == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6);
+ build_udp_header(buf + el3_len + ol3_len,
+ sizeof(struct guehdr) + il3_len + il4_len +
+ cfg_payload_len,
+ cfg_port_gue, cfg_l3_outer);
+ break;
+ case IPPROTO_GRE:
+ build_gre_header(buf + el3_len + ol3_len,
+ cfg_l3_inner == PF_INET ? ETH_P_IP
+ : ETH_P_IPV6);
+ break;
+ }
+
+ switch (cfg_l3_extra) {
+ case PF_INET:
+ build_ipv4_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ extra_saddr4.sin_addr.s_addr,
+ extra_daddr4.sin_addr.s_addr,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ case PF_INET6:
+ build_ipv6_header(buf,
+ cfg_l3_outer == PF_INET ? IPPROTO_IPIP
+ : IPPROTO_IPV6,
+ &extra_saddr6, &extra_daddr6,
+ ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len, 0);
+ break;
+ }
+
+ return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+ cfg_payload_len;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(void)
+{
+ int family, fd, ret;
+
+ if (cfg_l3_extra)
+ family = cfg_l3_extra;
+ else if (cfg_l3_outer)
+ family = cfg_l3_outer;
+ else
+ family = cfg_l3_inner;
+
+ fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+ if (fd == -1)
+ error(1, errno, "socket tx");
+
+ if (cfg_l3_extra) {
+ if (cfg_l3_extra == PF_INET)
+ ret = connect(fd, (void *) &extra_daddr4,
+ sizeof(extra_daddr4));
+ else
+ ret = connect(fd, (void *) &extra_daddr6,
+ sizeof(extra_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else if (cfg_l3_outer) {
+ /* connect to destination if not encapsulated */
+ if (cfg_l3_outer == PF_INET)
+ ret = connect(fd, (void *) &out_daddr4,
+ sizeof(out_daddr4));
+ else
+ ret = connect(fd, (void *) &out_daddr6,
+ sizeof(out_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ } else {
+ /* otherwise using loopback */
+ if (cfg_l3_inner == PF_INET)
+ ret = connect(fd, (void *) &in_daddr4,
+ sizeof(in_daddr4));
+ else
+ ret = connect(fd, (void *) &in_daddr6,
+ sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "connect tx");
+ }
+
+ return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(void)
+{
+ int fd, ret;
+
+ fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket rx");
+
+ if (cfg_l3_inner == PF_INET)
+ ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
+ else
+ ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
+ if (ret)
+ error(1, errno, "bind rx");
+
+ return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+ int ret;
+
+ ret = write(fd, pkt, len);
+ if (ret == -1)
+ error(1, errno, "send");
+ if (ret != len)
+ error(1, errno, "send: len (%d < %d)\n", ret, len);
+
+ return 1;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = events;
+
+ ret = poll(&pfd, 1, timeout);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret && !(pfd.revents & POLLIN))
+ error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
+
+ return ret;
+}
+
+static int do_rx(int fd)
+{
+ char rbuf;
+ int ret, num = 0;
+
+ while (1) {
+ ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (rbuf != cfg_payload_char)
+ error(1, 0, "recv: payload mismatch");
+ num++;
+ };
+
+ return num;
+}
+
+static int do_main(void)
+{
+ unsigned long tstop, treport, tcur;
+ int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
+
+ if (!cfg_only_tx)
+ fdr = setup_rx();
+ if (!cfg_only_rx)
+ fdt = setup_tx();
+
+ len = build_packet();
+
+ tcur = util_gettime();
+ treport = tcur + 1000;
+ tstop = tcur + (cfg_num_secs * 1000);
+
+ while (1) {
+ if (!cfg_only_rx)
+ tx += do_tx(fdt, buf, len);
+
+ if (!cfg_only_tx)
+ rx += do_rx(fdr);
+
+ if (cfg_num_secs) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+ if (tcur >= treport) {
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+ tx = 0;
+ rx = 0;
+ treport = tcur + 1000;
+ }
+ } else {
+ if (tx == cfg_num_pkt)
+ break;
+ }
+ }
+
+ /* read straggler packets, if any */
+ if (rx < tx) {
+ tstop = util_gettime() + 100;
+ while (rx < tx) {
+ tcur = util_gettime();
+ if (tcur >= tstop)
+ break;
+
+ do_poll(fdr, POLLIN, tstop - tcur);
+ rx += do_rx(fdr);
+ }
+ }
+
+ fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
+
+ if (fdr != -1 && close(fdr))
+ error(1, errno, "close rx");
+ if (fdt != -1 && close(fdt))
+ error(1, errno, "close tx");
+
+ /*
+ * success (== 0) only if received all packets
+ * unless failure is expected, in which case none must arrive.
+ */
+ if (cfg_expect_failure)
+ return rx != 0;
+ else
+ return rx != tx;
+}
+
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
+ "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
+ "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
+ "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_addr(int family, void *addr, const char *optarg)
+{
+ int ret;
+
+ ret = inet_pton(family, optarg, addr);
+ if (ret == -1)
+ error(1, errno, "inet_pton");
+ if (ret == 0)
+ error(1, 0, "inet_pton: bad string");
+}
+
+static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
+{
+ parse_addr(AF_INET, &addr->sin_addr, optarg);
+}
+
+static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
+{
+ parse_addr(AF_INET6, &addr->sin6_addr, optarg);
+}
+
+static int parse_protocol_family(const char *filepath, const char *optarg)
+{
+ if (!strcmp(optarg, "4"))
+ return PF_INET;
+ if (!strcmp(optarg, "6"))
+ return PF_INET6;
+
+ usage(filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
+ switch (c) {
+ case 'd':
+ if (cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "-d must be preceded by -o");
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_daddr4, optarg);
+ else
+ parse_addr6(&out_daddr6, optarg);
+ break;
+ case 'D':
+ if (cfg_l3_inner == AF_UNSPEC)
+ error(1, 0, "-D must be preceded by -i");
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_daddr4, optarg);
+ else
+ parse_addr6(&in_daddr6, optarg);
+ break;
+ case 'e':
+ if (!strcmp(optarg, "gre"))
+ cfg_encap_proto = IPPROTO_GRE;
+ else if (!strcmp(optarg, "gue"))
+ cfg_encap_proto = IPPROTO_UDP;
+ else if (!strcmp(optarg, "bare"))
+ cfg_encap_proto = IPPROTO_IPIP;
+ else if (!strcmp(optarg, "none"))
+ cfg_encap_proto = IPPROTO_IP; /* == 0 */
+ else
+ usage(argv[0]);
+ break;
+ case 'f':
+ cfg_src_port = strtol(optarg, NULL, 0);
+ break;
+ case 'F':
+ cfg_expect_failure = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ if (!strcmp(optarg, "4"))
+ cfg_l3_inner = PF_INET;
+ else if (!strcmp(optarg, "6"))
+ cfg_l3_inner = PF_INET6;
+ else
+ usage(argv[0]);
+ break;
+ case 'l':
+ cfg_payload_len = strtol(optarg, NULL, 0);
+ break;
+ case 'n':
+ cfg_num_pkt = strtol(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_l3_outer = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'O':
+ cfg_l3_extra = parse_protocol_family(argv[0], optarg);
+ break;
+ case 'R':
+ cfg_only_rx = true;
+ break;
+ case 's':
+ if (cfg_l3_outer == AF_INET)
+ parse_addr4(&out_saddr4, optarg);
+ else
+ parse_addr6(&out_saddr6, optarg);
+ break;
+ case 'S':
+ if (cfg_l3_inner == AF_INET)
+ parse_addr4(&in_saddr4, optarg);
+ else
+ parse_addr6(&in_saddr6, optarg);
+ break;
+ case 't':
+ cfg_num_secs = strtol(optarg, NULL, 0);
+ break;
+ case 'T':
+ cfg_only_tx = true;
+ break;
+ case 'x':
+ cfg_dsfield_outer = strtol(optarg, NULL, 0);
+ break;
+ case 'X':
+ cfg_dsfield_inner = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ if (cfg_only_rx && cfg_only_tx)
+ error(1, 0, "options: cannot combine rx-only and tx-only");
+
+ if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
+ error(1, 0, "options: must specify outer with encap");
+ else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and outer");
+ else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
+ error(1, 0, "options: cannot combine no-encap and extra");
+
+ if (cfg_l3_inner == AF_UNSPEC)
+ cfg_l3_inner = AF_INET6;
+ if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
+ cfg_encap_proto = IPPROTO_IPV6;
+
+ /* RFC 6040 4.2:
+ * on decap, if outer encountered congestion (CE == 0x3),
+ * but inner cannot encode ECN (NoECT == 0x0), then drop packet.
+ */
+ if (((cfg_dsfield_outer & 0x3) == 0x3) &&
+ ((cfg_dsfield_inner & 0x3) == 0x0))
+ cfg_expect_failure = true;
+}
+
+static void print_opts(void)
+{
+ if (cfg_l3_inner == PF_INET6) {
+ util_printaddr("inner.dest6", (void *) &in_daddr6);
+ util_printaddr("inner.source6", (void *) &in_saddr6);
+ } else {
+ util_printaddr("inner.dest4", (void *) &in_daddr4);
+ util_printaddr("inner.source4", (void *) &in_saddr4);
+ }
+
+ if (!cfg_l3_outer)
+ return;
+
+ fprintf(stderr, "encap proto: %u\n", cfg_encap_proto);
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("outer.dest6", (void *) &out_daddr6);
+ util_printaddr("outer.source6", (void *) &out_saddr6);
+ } else {
+ util_printaddr("outer.dest4", (void *) &out_daddr4);
+ util_printaddr("outer.source4", (void *) &out_saddr4);
+ }
+
+ if (!cfg_l3_extra)
+ return;
+
+ if (cfg_l3_outer == PF_INET6) {
+ util_printaddr("extra.dest6", (void *) &extra_daddr6);
+ util_printaddr("extra.source6", (void *) &extra_saddr6);
+ } else {
+ util_printaddr("extra.dest4", (void *) &extra_daddr4);
+ util_printaddr("extra.source4", (void *) &extra_saddr4);
+ }
+
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ print_opts();
+ return do_main();
+}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
new file mode 100755
index 000000000000..c0fb073b5eab
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load BPF flow dissector and verify it correctly dissects traffic
+export TESTNAME=test_flow_dissector
+unmount=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+# This test needs to be run in a network namespace with in_netns.sh. Check if
+# this is the case and run it with in_netns.sh if it is being run in the root
+# namespace.
+if [[ -z $(ip netns identify $$) ]]; then
+ ../net/in_netns.sh "$0" "$@"
+ exit $?
+fi
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+ if (( $? == 0 )); then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+
+ set +e
+
+ # Cleanup
+ tc filter del dev lo ingress pref 1337 2> /dev/null
+ tc qdisc del dev lo ingress 2> /dev/null
+ ./flow_dissector_load -d 2> /dev/null
+ if [ $unmount -ne 0 ]; then
+ umount bpffs 2> /dev/null
+ fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+# Mount BPF file system
+if /bin/mount | grep /sys/fs/bpf > /dev/null; then
+ echo "bpffs already mounted"
+else
+ echo "bpffs not mounted. Mounting..."
+ unmount=1
+ /bin/mount bpffs /sys/fs/bpf -t bpf
+fi
+
+# Attach BPF program
+./flow_dissector_load -p bpf_flow.o -s dissect
+
+# Setup
+tc qdisc add dev lo ingress
+
+echo "Testing IPv4..."
+# Drops all IP/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -f 10
+
+echo "Testing IPIP..."
+# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+echo "Testing IPv4 + GRE..."
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 8
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 9 -F
+# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
+./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
+ -D 192.168.0.1 -S 1.1.1.1 -f 10
+
+tc filter del dev lo ingress pref 1337
+
+echo "Testing IPv6..."
+# Drops all IPv6/UDP packets coming from port 9
+tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
+ udp src_port 9 action drop
+
+# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 6 -f 8
+# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 6 -f 9 -F
+# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 6 -f 10
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6f54f84144a0..9b552c0fc47d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -580,7 +580,11 @@ static void test_sockmap(int tasks, void *data)
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
- if (err) {
+ if (i < 2 && !err) {
+ printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+ i, sfd[i]);
+ goto out_sockmap;
+ } else if (i >= 2 && err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
@@ -741,7 +745,7 @@ static void test_sockmap(int tasks, void *data)
}
/* Test map update elem afterwards fd lives in fd and map_fd */
- for (i = 0; i < 6; i++) {
+ for (i = 2; i < 6; i++) {
err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
if (err) {
printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
@@ -845,7 +849,7 @@ static void test_sockmap(int tasks, void *data)
}
/* Delete the elems without programs */
- for (i = 0; i < 6; i++) {
+ for (i = 2; i < 6; i++) {
err = bpf_map_delete_elem(fd, &i);
if (err) {
printf("Failed delete sockmap %i '%i:%i'\n",
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0ef68204c84b..63a671803ed6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -112,13 +112,13 @@ static void test_pkt_access(void)
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, &retval, &duration);
- CHECK(err || errno || retval, "ipv4",
+ CHECK(err || retval, "ipv4",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
- CHECK(err || errno || retval, "ipv6",
+ CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
bpf_object__close(obj);
@@ -153,14 +153,14 @@ static void test_xdp(void)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 74 ||
+ CHECK(err || retval != XDP_TX || size != 74 ||
iph->protocol != IPPROTO_IPIP, "ipv4",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 114 ||
+ CHECK(err || retval != XDP_TX || size != 114 ||
iph6->nexthdr != IPPROTO_IPV6, "ipv6",
"err %d errno %d retval %d size %d\n",
err, errno, retval, size);
@@ -185,13 +185,13 @@ static void test_xdp_adjust_tail(void)
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_DROP,
+ CHECK(err || retval != XDP_DROP,
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != XDP_TX || size != 54,
+ CHECK(err || retval != XDP_TX || size != 54,
"ipv6", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
bpf_object__close(obj);
@@ -254,14 +254,14 @@ static void test_l4lb(const char *file)
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+ CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+ CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
@@ -343,14 +343,14 @@ static void test_xdp_noinline(void)
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 1 || size != 54 ||
+ CHECK(err || retval != 1 || size != 54 ||
*magic != MAGIC_VAL, "ipv4",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || errno || retval != 1 || size != 74 ||
+ CHECK(err || retval != 1 || size != 74 ||
*magic != MAGIC_VAL, "ipv6",
"err %d errno %d retval %d size %d magic %x\n",
err, errno, retval, size, *magic);
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
new file mode 100755
index 000000000000..ffcd3953f94c
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_addr.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# add private ipv4 and ipv6 addresses to loopback
+
+readonly V6_INNER='100::a/128'
+readonly V4_INNER='192.168.0.1/32'
+
+if getopts ":s" opt; then
+ readonly SIT_DEV_NAME='sixtofourtest0'
+ readonly V6_SIT='2::/64'
+ readonly V4_SIT='172.17.0.1/32'
+ shift
+fi
+
+fail() {
+ echo "error: $*" 1>&2
+ exit 1
+}
+
+setup() {
+ ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
+ ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
+
+ if [[ -n "${V6_SIT}" ]]; then
+ ip link add "${SIT_DEV_NAME}" type sit remote any local any \
+ || fail 'failed to add sit'
+ ip link set dev "${SIT_DEV_NAME}" up \
+ || fail 'failed to bring sit device up'
+ ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v6 SIT address'
+ ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
+ || fail 'failed to setup v4 SIT address'
+ fi
+
+ sleep 2 # avoid race causing bind to fail
+}
+
+cleanup() {
+ if [[ -n "${V6_SIT}" ]]; then
+ ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
+ ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
+ ip link del "${SIT_DEV_NAME}"
+ fi
+
+ ip -4 addr del "${V4_INNER}" dev lo
+ ip -6 addr del "${V6_INNER}" dev lo
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
new file mode 100755
index 000000000000..e24949ed3a20
--- /dev/null
+++ b/tools/testing/selftests/bpf/with_tunnels.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# setup tunnels for flow dissection test
+
+readonly SUFFIX="test_$(mktemp -u XXXX)"
+CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
+
+setup() {
+ ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
+ ip link add "gre_${SUFFIX}" type gre ${CONFIG}
+ ip link add "sit_${SUFFIX}" type sit ${CONFIG}
+
+ echo "tunnels before test:"
+ ip tunnel show
+
+ ip link set "ipip_${SUFFIX}" up
+ ip link set "gre_${SUFFIX}" up
+ ip link set "sit_${SUFFIX}" up
+}
+
+
+cleanup() {
+ ip tunnel del "ipip_${SUFFIX}"
+ ip tunnel del "gre_${SUFFIX}"
+ ip tunnel del "sit_${SUFFIX}"
+
+ echo "tunnels after test:"
+ ip tunnel show
+}
+
+trap cleanup EXIT
+
+setup
+"$@"
+exit "$?"
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 95eb3a53c381..adacda50a4b2 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1 +1,2 @@
test_memcontrol
+test_core
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 1c5d2b2a583b..14c9fe284806 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -89,17 +89,28 @@ int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
int cg_read_strcmp(const char *cgroup, const char *control,
const char *expected)
{
- size_t size = strlen(expected) + 1;
+ size_t size;
char *buf;
+ int ret;
+
+ /* Handle the case of comparing against empty string */
+ if (!expected)
+ size = 32;
+ else
+ size = strlen(expected) + 1;
buf = malloc(size);
if (!buf)
return -1;
- if (cg_read(cgroup, control, buf, size))
+ if (cg_read(cgroup, control, buf, size)) {
+ free(buf);
return -1;
+ }
- return strcmp(expected, buf);
+ ret = strcmp(expected, buf);
+ free(buf);
+ return ret;
}
int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
@@ -337,3 +348,24 @@ int is_swap_enabled(void)
return cnt > 1;
}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 1ff6f9f1abdc..9ac8b7958f83 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -40,3 +40,4 @@ extern int get_temp_fd(void);
extern int alloc_pagecache(int fd, size_t size);
extern int alloc_anon(const char *cgroup, void *arg);
extern int is_swap_enabled(void);
+extern int set_oom_adj_score(int pid, int score);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index cf0bddc9d271..28d321ba311b 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -2,6 +2,7 @@
#define _GNU_SOURCE
#include <linux/limits.h>
+#include <linux/oom.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -202,6 +203,36 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
return 0;
}
+static int alloc_anon_noexit(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ if (alloc_anon(cgroup, arg))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * Wait until processes are killed asynchronously by the OOM killer
+ * If we exceed a timeout, fail.
+ */
+static int cg_test_proc_killed(const char *cgroup)
+{
+ int limit;
+
+ for (limit = 10; limit > 0; limit--) {
+ if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
+ return 0;
+
+ usleep(100000);
+ }
+ return -1;
+}
+
/*
* First, this test creates the following hierarchy:
* A memory.min = 50M, memory.max = 200M
@@ -964,6 +995,177 @@ cleanup:
return ret;
}
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the leaf (but not the parent) were killed.
+ */
+static int test_memcg_oom_group_leaf_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+
+ if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
+ goto cleanup;
+
+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the parent and leaf were killed.
+ */
+static int test_memcg_oom_group_parent_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.max", "80M"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+ if (cg_test_proc_killed(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes were killed except those set with OOM_SCORE_ADJ_MIN
+ */
+static int test_memcg_oom_group_score_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ int safe_pid;
+
+ memcg = cg_name(root, "memcg_test_0");
+
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.oom.group", "1"))
+ goto cleanup;
+
+ safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+ goto cleanup;
+
+ if (kill(safe_pid, SIGKILL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (memcg)
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
@@ -978,6 +1180,9 @@ struct memcg_test {
T(test_memcg_oom_events),
T(test_memcg_swap_max),
T(test_memcg_sock),
+ T(test_memcg_oom_group_leaf_events),
+ T(test_memcg_oom_group_parent_events),
+ T(test_memcg_oom_group_score_events),
};
#undef T
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100644
index 000000000000..0150bb2741eb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +-----------------------+ +----------------------------------+
+# | H1 | | H2 |
+# | | | unicast --> + $h2.111 |
+# | | | traffic | 192.0.2.129/28 |
+# | multicast | | | e-qos-map 0:1 |
+# | traffic | | | |
+# | $h1 + <----- | | + $h2 |
+# +-----|-----------------+ +--------------|-------------------+
+# | |
+# +-----|-------------------------------------------------|-------------------+
+# | + $swp1 + $swp2 |
+# | | >1Gbps | >1Gbps |
+# | +---|----------------+ +----------|----------------+ |
+# | | + $swp1.1 | | + $swp2.111 | |
+# | | BR1 | SW | BR111 | |
+# | | + $swp3.1 | | + $swp3.111 | |
+# | +---|----------------+ +----------|----------------+ |
+# | \_________________________________________________/ |
+# | | |
+# | + $swp3 |
+# | | 1Gbps bottleneck |
+# | | prio qdisc: {0..7} -> 7 |
+# +------------------------------------|--------------------------------------+
+# |
+# +--|-----------------+
+# | + $h3 H3 |
+# | | |
+# | + $h3.111 |
+# | 192.0.2.130/28 |
+# +--------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_mc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+}
+
+h1_destroy()
+{
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+
+ vlan_create $h2 111 v$h2 192.0.2.129/28
+ ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ mtu_set $h3 10000
+
+ vlan_create $h3 111 v$h3 192.0.2.130/28
+}
+
+h3_destroy()
+{
+ vlan_destroy $h3 111
+
+ mtu_restore $h3
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+
+ vlan_create $swp2 111
+ vlan_create $swp3 111
+
+ ethtool -s $swp3 speed 1000 autoneg off
+ tc qdisc replace dev $swp3 root handle 3: \
+ prio bands 8 priomap 7 7 7 7 7 7 7 7
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 up
+ ip link set dev $swp1 master br1
+ ip link set dev $swp3 master br1
+
+ ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 up
+ ip link set dev $swp2.111 master br111
+ ip link set dev $swp3.111 master br111
+}
+
+switch_destroy()
+{
+ ip link del dev br111
+ ip link del dev br1
+
+ tc qdisc del dev $swp3 root handle 3:
+ ethtool -s $swp3 autoneg on
+
+ vlan_destroy $swp3 111
+ vlan_destroy $swp2 111
+
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h2 192.0.2.130
+}
+
+humanize()
+{
+ local speed=$1; shift
+
+ for unit in bps Kbps Mbps Gbps; do
+ if (($(echo "$speed < 1024" | bc))); then
+ break
+ fi
+
+ speed=$(echo "scale=1; $speed / 1024" | bc)
+ done
+
+ echo "$speed${unit}"
+}
+
+rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $((8 * (t1 - t0) / interval))
+}
+
+check_rate()
+{
+ local rate=$1; shift
+ local min=$1; shift
+ local what=$1; shift
+
+ if ((rate > min)); then
+ return 0
+ fi
+
+ echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
+ return 1
+}
+
+measure_uc_rate()
+{
+ local what=$1; shift
+
+ local interval=10
+ local i
+ local ret=0
+
+ # Dips in performance might cause momentary ingress rate to drop below
+ # 1Gbps. That wouldn't saturate egress and MC would thus get through,
+ # seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+ # average ingress rate to somewhat mitigate this.
+ local min_ingress=2147483648
+
+ mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+ -a own -b $h3mac -t udp -q &
+ sleep 1
+
+ for i in {5..0}; do
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+ sleep $interval
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+ local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+ local ir=$(rate $u0 $u1 $interval)
+ local er=$(rate $t0 $t1 $interval)
+
+ if check_rate $ir $min_ingress "$what ingress rate"; then
+ break
+ fi
+
+ # Fail the test if we can't get the throughput.
+ if ((i == 0)); then
+ ret=1
+ fi
+ done
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ echo $ir $er
+ exit $ret
+}
+
+test_mc_aware()
+{
+ RET=0
+
+ local -a uc_rate
+ uc_rate=($(measure_uc_rate "UC-only"))
+ check_err $? "Could not get high enough UC-only ingress rate"
+ local ucth1=${uc_rate[1]}
+
+ mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+
+ local d0=$(date +%s)
+ local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+ local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+ local -a uc_rate_2
+ uc_rate_2=($(measure_uc_rate "UC+MC"))
+ check_err $? "Could not get high enough UC+MC ingress rate"
+ local ucth2=${uc_rate_2[1]}
+
+ local d1=$(date +%s)
+ local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+ local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+ local deg=$(bc <<< "
+ scale=2
+ ret = 100 * ($ucth1 - $ucth2) / $ucth1
+ if (ret > 0) { ret } else { 0 }
+ ")
+ check_err $(bc <<< "$deg > 10")
+
+ local interval=$((d1 - d0))
+ local mc_ir=$(rate $u0 $u1 $interval)
+ local mc_er=$(rate $t0 $t1 $interval)
+
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait; } 2>/dev/null
+
+ log_test "UC performace under MC overload"
+
+ echo "UC-only throughput $(humanize $ucth1)"
+ echo "UC+MC throughput $(humanize $ucth2)"
+ echo "Degradation $deg %"
+ echo
+ echo "Full report:"
+ echo " UC only:"
+ echo " ingress UC throughput $(humanize ${uc_rate[0]})"
+ echo " egress UC throughput $(humanize ${uc_rate[1]})"
+ echo " UC+MC:"
+ echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
+ echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
+ echo " ingress MC throughput $(humanize $mc_ir)"
+ echo " egress MC throughput $(humanize $mc_er)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000000..4e151f1005b2
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index ff8feca49746..ad1eeb14fda7 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -18,6 +18,7 @@ TEST_GEN_FILES := \
TEST_PROGS := run.sh
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 1bbb47565c55..4665cdbf1a8d 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -21,11 +21,8 @@ endef
CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
LDLIBS += -lmount -I/usr/include/libmount
-$(BINARIES): ../../../gpio/gpio-utils.o ../../../../usr/include/linux/gpio.h
+$(BINARIES):| khdr
+$(BINARIES): ../../../gpio/gpio-utils.o
../../../gpio/gpio-utils.o:
make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
-
-../../../../usr/include/linux/gpio.h:
- make -C ../../../.. headers_install INSTALL_HDR_PATH=$(shell pwd)/../../../../usr/
-
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 15e6b75fc3a5..a3edb2c8e43d 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -19,7 +19,6 @@
#define KSFT_FAIL 1
#define KSFT_XFAIL 2
#define KSFT_XPASS 3
-/* Treat skip as pass */
#define KSFT_SKIP 4
/* counters */
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 4202139d81d9..5c34752e1cff 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
cr4_cpuid_sync_test
+platform_info_test
set_sregs_test
sync_regs_test
vmx_tsc_adjust_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 03b0f551bedf..ec32dad3c3f0 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -6,7 +6,8 @@ UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
LIBKVM_x86_64 = lib/x86.c lib/vmx.c
-TEST_GEN_PROGS_x86_64 = set_sregs_test
+TEST_GEN_PROGS_x86_64 = platform_info_test
+TEST_GEN_PROGS_x86_64 += set_sregs_test
TEST_GEN_PROGS_x86_64 += sync_regs_test
TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
@@ -20,7 +21,7 @@ INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
-LDFLAGS += -lpthread
+LDFLAGS += -pthread
# After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
@@ -37,9 +38,6 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
$(AR) crs $@ $^
-$(LINUX_HDR_PATH):
- make -C $(top_srcdir) headers_install
-
-all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+all: $(STATIC_LIBS)
$(TEST_GEN_PROGS): $(STATIC_LIBS)
-$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
+$(STATIC_LIBS):| khdr
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bb5a25fb82c6..3acf9a91704c 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -50,6 +50,7 @@ enum vm_mem_backing_src_type {
};
int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
@@ -108,6 +109,9 @@ void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events);
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
const char *exit_reason_str(unsigned int exit_reason);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e9ba389c48db..6fd8c089cafc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -63,6 +63,29 @@ int kvm_check_cap(long cap)
return ret;
}
+/* VM Enable Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+ TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
static void vm_open(struct kvm_vm *vm, int perm)
{
vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -1220,6 +1243,72 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
ret, errno);
}
+/* VCPU Get MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+
+ return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+}
+
/* VM VCPU Args Set
*
* Input Args:
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c
new file mode 100644
index 000000000000..3764e7121265
--- /dev/null
+++ b/tools/testing/selftests/kvm/platform_info_test.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+ uint64_t msr_platform_info;
+
+ for (;;) {
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_SYNC(msr_platform_info);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+ struct kvm_enable_cap cap = {};
+
+ cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+ cap.flags = 0;
+ cap.args[0] = (int)enable;
+ vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct guest_args args;
+
+ set_msr_platform_info_enabled(vm, true);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ guest_args_read(vm, VCPU_ID, &args);
+ TEST_ASSERT(args.port == GUEST_PORT_SYNC,
+ "Received IO from port other than PORT_HOST_SYNC: %u\n",
+ run->io.port);
+ TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ set_msr_platform_info_enabled(vm, false);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *state;
+ int rv;
+ uint64_t msr_platform_info;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+ if (!rv) {
+ fprintf(stderr,
+ "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_disabled(vm);
+ test_msr_platform_info_enabled(vm);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 17ab36605a8e..0a8e75886224 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -16,8 +16,20 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+top_srcdir ?= ../../../..
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+.PHONY: khdr
+khdr:
+ make ARCH=$(ARCH) -C $(top_srcdir) headers_install
+
+ifdef KSFT_KHDR_INSTALL
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES):| khdr
+endif
+
.ONESHELL:
define RUN_TEST_PRINT_RESULT
TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index 2fde30191a47..a7e8cd5bb265 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -2,3 +2,4 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTPLUG_SPARSE=y
CONFIG_NOTIFIER_ERROR_INJECTION=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index cccdb2295567..256d82d5fa87 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -15,6 +15,7 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+KSFT_KHDR_INSTALL := 1
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 0f45633bd634..802b4af18729 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,11 +9,11 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
-IP="ip -netns testns"
+IP="ip -netns ns1"
log_test()
{
@@ -47,8 +47,10 @@ log_test()
setup()
{
set -e
- ip netns add testns
+ ip netns add ns1
$IP link set dev lo up
+ ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP link add dummy0 type dummy
$IP link set dev dummy0 up
@@ -61,7 +63,8 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del testns
+ ip netns del ns1
+ ip netns del ns2 &> /dev/null
}
get_linklocal()
@@ -639,11 +642,14 @@ add_initial_route6()
check_route6()
{
- local pfx="2001:db8:104::/64"
+ local pfx
local expected="$1"
local out
local rc=0
+ set -- $expected
+ pfx=$1
+
out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
[ "${out}" = "${expected}" ] && return 0
@@ -690,28 +696,33 @@ route_setup()
[ "${VERBOSE}" = "1" ] && set -x
set -e
- $IP li add red up type vrf table 101
+ ip netns add ns2
+ ip -netns ns2 link set dev lo up
+ ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+
$IP li add veth1 type veth peer name veth2
$IP li add veth3 type veth peer name veth4
$IP li set veth1 up
$IP li set veth3 up
- $IP li set veth2 vrf red up
- $IP li set veth4 vrf red up
- $IP li add dummy1 type dummy
- $IP li set dummy1 vrf red up
-
- $IP -6 addr add 2001:db8:101::1/64 dev veth1
- $IP -6 addr add 2001:db8:101::2/64 dev veth2
- $IP -6 addr add 2001:db8:103::1/64 dev veth3
- $IP -6 addr add 2001:db8:103::2/64 dev veth4
- $IP -6 addr add 2001:db8:104::1/64 dev dummy1
+ $IP li set veth2 netns ns2 up
+ $IP li set veth4 netns ns2 up
+ ip -netns ns2 li add dummy1 type dummy
+ ip -netns ns2 li set dummy1 up
+ $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
+ $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
$IP addr add 172.16.101.1/24 dev veth1
- $IP addr add 172.16.101.2/24 dev veth2
$IP addr add 172.16.103.1/24 dev veth3
- $IP addr add 172.16.103.2/24 dev veth4
- $IP addr add 172.16.104.1/24 dev dummy1
+
+ ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+ ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+ ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+
+ ip -netns ns2 addr add 172.16.101.2/24 dev veth2
+ ip -netns ns2 addr add 172.16.103.2/24 dev veth4
+ ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
set +ex
}
@@ -944,7 +955,7 @@ ipv6_addr_metric_test()
log_test $rc 0 "Modify metric of address"
# verify prefix route removed on down
- run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
@@ -967,6 +978,77 @@ ipv6_addr_metric_test()
cleanup
}
+ipv6_route_metrics_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 routes with metrics"
+
+ route_setup
+
+ #
+ # single path with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400"
+ rc=$?
+ fi
+ log_test $rc 0 "Single path route with mtu metric"
+
+
+ #
+ # multipath via separate routes with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400"
+ run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first"
+
+ # second route is coalesced to first to make a multipath route.
+ # MTU of the second path is hidden from display!
+ run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd"
+
+ run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2"
+ if [ $? -eq 0 ]; then
+ check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400"
+ log_test $? 0 " MTU of second leg"
+ fi
+
+ #
+ # multipath with metrics
+ #
+ run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route with mtu metric"
+
+ $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
+ run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1"
+ log_test $? 0 "Using route with mtu metric"
+
+ run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo"
+ log_test $? 2 "Invalid metric (fails metric_convert)"
+
+ route_cleanup
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@@ -1005,11 +1087,15 @@ add_initial_route()
check_route()
{
- local pfx="172.16.104.0/24"
+ local pfx
local expected="$1"
local out
local rc=0
+ set -- $expected
+ pfx=$1
+ [ "${pfx}" = "unreachable" ] && pfx=$2
+
out=$($IP ro ls match ${pfx})
[ "${out}" = "${expected}" ] && return 0
@@ -1319,6 +1405,43 @@ ipv4_addr_metric_test()
cleanup
}
+ipv4_route_metrics_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 route add / append tests"
+
+ route_setup
+
+ run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400"
+ rc=$?
+ fi
+ log_test $rc 0 "Single path route with mtu metric"
+
+
+ run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ rc=$?
+ fi
+ log_test $rc 0 "Multipath route with mtu metric"
+
+ $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
+ run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+ log_test $? 0 "Using route with mtu metric"
+
+ run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
+ log_test $? 2 "Invalid metric (fails metric_convert)"
+
+ route_cleanup
+}
+
+
################################################################################
# usage
@@ -1385,6 +1508,8 @@ do
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
ipv4_addr_metric) ipv4_addr_metric_test;;
+ ipv6_route_metrics) ipv6_route_metrics_test;;
+ ipv4_route_metrics) ipv4_route_metrics_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
new file mode 100755
index 000000000000..1f8ef0eff862
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="sticky"
+NUM_NETIFS=4
+TEST_MAC=de:ad:be:ef:13:37
+source lib.sh
+
+switch_create()
+{
+ ip link add dev br0 type bridge
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $h1 up
+ ip link set dev $swp1 up
+ ip link set dev $h2 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $h2 down
+ ip link set dev $swp1 down
+ ip link set dev $h1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+ switch_destroy
+}
+
+sticky()
+{
+ bridge fdb add $TEST_MAC dev $swp1 master static sticky
+ check_err $? "Could not add fdb entry"
+ bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky
+ $MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q
+ bridge -j fdb show br br0 brport $swp1\
+ | jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ log_test "Sticky fdb entry"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ca53b539aa2d..0e73698b2048 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -494,6 +494,14 @@ tc_rule_stats_get()
| jq '.[1].options.actions[].stats.packets'
}
+ethtool_stats_get()
+{
+ local dev=$1; shift
+ local stat=$1; shift
+
+ ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
+}
+
mac_get()
{
local if_name=$1
@@ -541,6 +549,23 @@ forwarding_restore()
sysctl_restore net.ipv4.conf.all.forwarding
}
+declare -A MTU_ORIG
+mtu_set()
+{
+ local dev=$1; shift
+ local mtu=$1; shift
+
+ MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
+ ip link set dev $dev mtu $mtu
+}
+
+mtu_restore()
+{
+ local dev=$1; shift
+
+ ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
+}
+
tc_offload_check()
{
local num_netifs=${1:-$NUM_NETIFS}
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
index 55fdcdc78eef..61ae2782388e 100644
--- a/tools/testing/selftests/net/ip_defrag.c
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -23,21 +23,28 @@ static bool cfg_overlap;
static unsigned short cfg_port = 9000;
const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
#define IP4_HLEN (sizeof(struct iphdr))
#define IP6_HLEN (sizeof(struct ip6_hdr))
#define UDP_HLEN (sizeof(struct udphdr))
-static int msg_len;
+/* IPv6 fragment header lenth. */
+#define FRAG_HLEN 8
+
+static int payload_len;
static int max_frag_len;
#define MSG_LEN_MAX 60000 /* Max UDP payload length. */
#define IP4_MF (1u << 13) /* IPv4 MF flag. */
+#define IP6_MF (1) /* IPv6 MF flag. */
+
+#define CSUM_MANGLED_0 (0xffff)
static uint8_t udp_payload[MSG_LEN_MAX];
static uint8_t ip_frame[IP_MAXPACKET];
-static uint16_t ip_id = 0xabcd;
+static uint32_t ip_id = 0xabcd;
static int msg_counter;
static int frag_counter;
static unsigned int seed;
@@ -48,25 +55,25 @@ static void recv_validate_udp(int fd_udp)
ssize_t ret;
static uint8_t recv_buff[MSG_LEN_MAX];
- ret = recv(fd_udp, recv_buff, msg_len, 0);
+ ret = recv(fd_udp, recv_buff, payload_len, 0);
msg_counter++;
if (cfg_overlap) {
if (ret != -1)
- error(1, 0, "recv: expected timeout; got %d; seed = %u",
- (int)ret, seed);
+ error(1, 0, "recv: expected timeout; got %d",
+ (int)ret);
if (errno != ETIMEDOUT && errno != EAGAIN)
- error(1, errno, "recv: expected timeout: %d; seed = %u",
- errno, seed);
+ error(1, errno, "recv: expected timeout: %d",
+ errno);
return; /* OK */
}
if (ret == -1)
- error(1, errno, "recv: msg_len = %d max_frag_len = %d",
- msg_len, max_frag_len);
- if (ret != msg_len)
- error(1, 0, "recv: wrong size: %d vs %d", (int)ret, msg_len);
- if (memcmp(udp_payload, recv_buff, msg_len))
+ error(1, errno, "recv: payload_len = %d max_frag_len = %d",
+ payload_len, max_frag_len);
+ if (ret != payload_len)
+ error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len);
+ if (memcmp(udp_payload, recv_buff, payload_len))
error(1, 0, "recv: wrong data");
}
@@ -92,31 +99,95 @@ static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
{
uint32_t sum = 0;
+ uint16_t res;
sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
- IPPROTO_UDP + (uint32_t)(UDP_HLEN + msg_len));
- sum = raw_checksum((uint8_t *)udp_payload, msg_len, sum);
+ IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len));
+ sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
+ sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+ res = 0xffff & ~sum;
+ if (res)
+ return htons(res);
+ else
+ return CSUM_MANGLED_0;
+}
+
+static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr)
+{
+ uint32_t sum = 0;
+ uint16_t res;
+
+ sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src),
+ IPPROTO_UDP);
+ sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum);
sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
- return htons(0xffff & ~sum);
+ sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
+ res = 0xffff & ~sum;
+ if (res)
+ return htons(res);
+ else
+ return CSUM_MANGLED_0;
}
static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
- struct ip *iphdr, int offset)
+ int offset, bool ipv6)
{
int frag_len;
int res;
+ int payload_offset = offset > 0 ? offset - UDP_HLEN : 0;
+ uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN :
+ ip_frame + IP4_HLEN;
+
+ if (offset == 0) {
+ struct udphdr udphdr;
+ udphdr.source = htons(cfg_port + 1);
+ udphdr.dest = htons(cfg_port);
+ udphdr.len = htons(UDP_HLEN + payload_len);
+ udphdr.check = 0;
+ if (ipv6)
+ udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr);
+ else
+ udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr);
+ memcpy(frag_start, &udphdr, UDP_HLEN);
+ }
- if (msg_len - offset <= max_frag_len) {
- /* This is the last fragment. */
- frag_len = IP4_HLEN + msg_len - offset;
- iphdr->ip_off = htons((offset + UDP_HLEN) / 8);
+ if (ipv6) {
+ struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+ /* This is the last fragment. */
+ frag_len = FRAG_HLEN + payload_len - payload_offset;
+ fraghdr->ip6f_offlg = htons(offset);
+ } else {
+ frag_len = FRAG_HLEN + max_frag_len;
+ fraghdr->ip6f_offlg = htons(offset | IP6_MF);
+ }
+ ip6hdr->ip6_plen = htons(frag_len);
+ if (offset == 0)
+ memcpy(frag_start + UDP_HLEN, udp_payload,
+ frag_len - FRAG_HLEN - UDP_HLEN);
+ else
+ memcpy(frag_start, udp_payload + payload_offset,
+ frag_len - FRAG_HLEN);
+ frag_len += IP6_HLEN;
} else {
- frag_len = IP4_HLEN + max_frag_len;
- iphdr->ip_off = htons((offset + UDP_HLEN) / 8 | IP4_MF);
+ struct ip *iphdr = (struct ip *)ip_frame;
+ if (payload_len - payload_offset <= max_frag_len && offset > 0) {
+ /* This is the last fragment. */
+ frag_len = IP4_HLEN + payload_len - payload_offset;
+ iphdr->ip_off = htons(offset / 8);
+ } else {
+ frag_len = IP4_HLEN + max_frag_len;
+ iphdr->ip_off = htons(offset / 8 | IP4_MF);
+ }
+ iphdr->ip_len = htons(frag_len);
+ if (offset == 0)
+ memcpy(frag_start + UDP_HLEN, udp_payload,
+ frag_len - IP4_HLEN - UDP_HLEN);
+ else
+ memcpy(frag_start, udp_payload + payload_offset,
+ frag_len - IP4_HLEN);
}
- iphdr->ip_len = htons(frag_len);
- memcpy(ip_frame + IP4_HLEN, udp_payload + offset,
- frag_len - IP4_HLEN);
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
if (res < 0)
@@ -127,10 +198,11 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
frag_counter++;
}
-static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen)
+static void send_udp_frags(int fd_raw, struct sockaddr *addr,
+ socklen_t alen, bool ipv6)
{
struct ip *iphdr = (struct ip *)ip_frame;
- struct udphdr udphdr;
+ struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
int res;
int offset;
int frag_len;
@@ -142,31 +214,55 @@ static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen)
* Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
* even fragments (0th, 2nd, etc.) are sent out.
*/
- memset(iphdr, 0, sizeof(*iphdr));
- iphdr->ip_hl = 5;
- iphdr->ip_v = 4;
- iphdr->ip_tos = 0;
- iphdr->ip_id = htons(ip_id++);
- iphdr->ip_ttl = 0x40;
- iphdr->ip_p = IPPROTO_UDP;
- iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
- iphdr->ip_dst = addr4;
- iphdr->ip_sum = 0;
+ if (ipv6) {
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ ((struct sockaddr_in6 *)addr)->sin6_port = 0;
+ memset(ip6hdr, 0, sizeof(*ip6hdr));
+ ip6hdr->ip6_flow = htonl(6<<28); /* Version. */
+ ip6hdr->ip6_nxt = IPPROTO_FRAGMENT;
+ ip6hdr->ip6_hops = 255;
+ ip6hdr->ip6_src = addr6;
+ ip6hdr->ip6_dst = addr6;
+ fraghdr->ip6f_nxt = IPPROTO_UDP;
+ fraghdr->ip6f_reserved = 0;
+ fraghdr->ip6f_ident = htonl(ip_id++);
+ } else {
+ memset(iphdr, 0, sizeof(*iphdr));
+ iphdr->ip_hl = 5;
+ iphdr->ip_v = 4;
+ iphdr->ip_tos = 0;
+ iphdr->ip_id = htons(ip_id++);
+ iphdr->ip_ttl = 0x40;
+ iphdr->ip_p = IPPROTO_UDP;
+ iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
+ iphdr->ip_dst = addr4;
+ iphdr->ip_sum = 0;
+ }
/* Odd fragments. */
- offset = 0;
- while (offset < msg_len) {
- send_fragment(fd_raw, addr, alen, iphdr, offset);
+ offset = max_frag_len;
+ while (offset < (UDP_HLEN + payload_len)) {
+ send_fragment(fd_raw, addr, alen, offset, ipv6);
offset += 2 * max_frag_len;
}
if (cfg_overlap) {
/* Send an extra random fragment. */
- offset = rand() % (UDP_HLEN + msg_len - 1);
+ offset = rand() % (UDP_HLEN + payload_len - 1);
/* sendto() returns EINVAL if offset + frag_len is too small. */
- frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
- iphdr->ip_off = htons(offset / 8 | IP4_MF);
- iphdr->ip_len = htons(frag_len);
+ if (ipv6) {
+ struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
+ frag_len = max_frag_len + rand() % 256;
+ /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */
+ frag_len &= ~0x7;
+ fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF);
+ ip6hdr->ip6_plen = htons(frag_len);
+ frag_len += IP6_HLEN;
+ } else {
+ frag_len = IP4_HLEN + UDP_HLEN + rand() % 256;
+ iphdr->ip_off = htons(offset / 8 | IP4_MF);
+ iphdr->ip_len = htons(frag_len);
+ }
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
if (res < 0)
error(1, errno, "sendto overlap");
@@ -175,48 +271,26 @@ static void send_udp_frags_v4(int fd_raw, struct sockaddr *addr, socklen_t alen)
frag_counter++;
}
- /* Zeroth fragment (UDP header). */
- frag_len = IP4_HLEN + UDP_HLEN;
- iphdr->ip_len = htons(frag_len);
- iphdr->ip_off = htons(IP4_MF);
-
- udphdr.source = htons(cfg_port + 1);
- udphdr.dest = htons(cfg_port);
- udphdr.len = htons(UDP_HLEN + msg_len);
- udphdr.check = 0;
- udphdr.check = udp_checksum(iphdr, &udphdr);
-
- memcpy(ip_frame + IP4_HLEN, &udphdr, UDP_HLEN);
- res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
- if (res < 0)
- error(1, errno, "sendto UDP header");
- if (res != frag_len)
- error(1, 0, "sendto UDP header: %d vs %d", (int)res, frag_len);
- frag_counter++;
-
- /* Even fragments. */
- offset = max_frag_len;
- while (offset < msg_len) {
- send_fragment(fd_raw, addr, alen, iphdr, offset);
+ /* Event fragments. */
+ offset = 0;
+ while (offset < (UDP_HLEN + payload_len)) {
+ send_fragment(fd_raw, addr, alen, offset, ipv6);
offset += 2 * max_frag_len;
}
}
-static void run_test(struct sockaddr *addr, socklen_t alen)
+static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6)
{
- int fd_tx_udp, fd_tx_raw, fd_rx_udp;
+ int fd_tx_raw, fd_rx_udp;
struct timeval tv = { .tv_sec = 0, .tv_usec = 10 * 1000 };
int idx;
+ int min_frag_len = ipv6 ? 1280 : 8;
/* Initialize the payload. */
for (idx = 0; idx < MSG_LEN_MAX; ++idx)
udp_payload[idx] = idx % 256;
/* Open sockets. */
- fd_tx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
- if (fd_tx_udp == -1)
- error(1, errno, "socket tx_udp");
-
fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
if (fd_tx_raw == -1)
error(1, errno, "socket tx_raw");
@@ -230,22 +304,21 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
error(1, errno, "setsockopt rcv timeout");
- for (msg_len = 1; msg_len < MSG_LEN_MAX; msg_len += (rand() % 4096)) {
+ for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX;
+ payload_len += (rand() % 4096)) {
if (cfg_verbose)
- printf("msg_len: %d\n", msg_len);
- max_frag_len = addr->sa_family == AF_INET ? 8 : 1280;
- for (; max_frag_len < 1500 && max_frag_len <= msg_len;
- max_frag_len += 8) {
- send_udp_frags_v4(fd_tx_raw, addr, alen);
+ printf("payload_len: %d\n", payload_len);
+ max_frag_len = min_frag_len;
+ do {
+ send_udp_frags(fd_tx_raw, addr, alen, ipv6);
recv_validate_udp(fd_rx_udp);
- }
+ max_frag_len += 8 * (rand() % 8);
+ } while (max_frag_len < (1500 - FRAG_HLEN) && max_frag_len <= payload_len);
}
/* Cleanup. */
if (close(fd_tx_raw))
error(1, errno, "close tx_raw");
- if (close(fd_tx_udp))
- error(1, errno, "close tx_udp");
if (close(fd_rx_udp))
error(1, errno, "close rx_udp");
@@ -265,13 +338,18 @@ static void run_test_v4(void)
addr.sin_port = htons(cfg_port);
addr.sin_addr = addr4;
- run_test((void *)&addr, sizeof(addr));
+ run_test((void *)&addr, sizeof(addr), false /* !ipv6 */);
}
static void run_test_v6(void)
{
- fprintf(stderr, "NOT IMPL.\n");
- exit(1);
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr), true /* ipv6 */);
}
static void parse_opts(int argc, char **argv)
@@ -303,6 +381,8 @@ int main(int argc, char **argv)
parse_opts(argc, argv);
seed = time(NULL);
srand(seed);
+ /* Print the seed to track/reproduce potential failures. */
+ printf("seed = %d\n", seed);
if (cfg_do_ipv4)
run_test_v4();
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
index 78743adcca9e..3a4042d918f0 100755
--- a/tools/testing/selftests/net/ip_defrag.sh
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -6,23 +6,34 @@
set +x
set -e
-echo "ipv4 defrag"
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null
+ ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 &> /dev/null
+ ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 &> /dev/null
+}
-run_v4() {
-sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null
-sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null
-./ip_defrag -4
+cleanup() {
+ ip netns del "${NETNS}"
}
-export -f run_v4
-./in_netns.sh "run_v4"
+trap cleanup EXIT
+setup
+
+echo "ipv4 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -4
+
echo "ipv4 defrag with overlaps"
-run_v4o() {
-sysctl -w net.ipv4.ipfrag_high_thresh=9000000 &> /dev/null
-sysctl -w net.ipv4.ipfrag_low_thresh=7000000 &> /dev/null
-./ip_defrag -4o
-}
-export -f run_v4o
+ip netns exec "${NETNS}" ./ip_defrag -4o
+
+echo "ipv6 defrag"
+ip netns exec "${NETNS}" ./ip_defrag -6
+
+echo "ipv6 defrag with overlaps"
+ip netns exec "${NETNS}" ./ip_defrag -6o
-./in_netns.sh "run_v4o"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 32a194e3e07a..b9cdb68df4c5 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -6,6 +6,26 @@
#
# Tests currently implemented:
#
+# - pmtu_ipv4
+# Set up two namespaces, A and B, with two paths between them over routers
+# R1 and R2 (also implemented with namespaces), with different MTUs:
+#
+# segment a_r1 segment b_r1 a_r1: 2000
+# .--------------R1--------------. a_r2: 1500
+# A B a_r3: 2000
+# '--------------R2--------------' a_r4: 1400
+# segment a_r2 segment b_r2
+#
+# Check that PMTU exceptions with the correct PMTU are created. Then
+# decrease and increase the MTU of the local link for one of the paths,
+# A to R1, checking that route exception PMTU changes accordingly over
+# this path. Also check that locked exceptions are created when an ICMP
+# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
+# received
+#
+# - pmtu_ipv6
+# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
+#
# - pmtu_vti4_exception
# Set up vti tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is not
@@ -50,6 +70,8 @@ ksft_skip=4
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
tests="
+ pmtu_ipv4_exception ipv4: PMTU exceptions
+ pmtu_ipv6_exception ipv6: PMTU exceptions
pmtu_vti6_exception vti6: PMTU exceptions
pmtu_vti4_exception vti4: PMTU exceptions
pmtu_vti4_default_mtu vti4: default MTU assignment
@@ -60,8 +82,45 @@ tests="
NS_A="ns-$(mktemp -u XXXXXX)"
NS_B="ns-$(mktemp -u XXXXXX)"
+NS_R1="ns-$(mktemp -u XXXXXX)"
+NS_R2="ns-$(mktemp -u XXXXXX)"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
+ns_r1="ip netns exec ${NS_R1}"
+ns_r2="ip netns exec ${NS_R2}"
+
+# Addressing and routing for tests with routers: four network segments, with
+# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
+# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
+# Addresses are:
+# - IPv4: PREFIX4.SEGMENT.ID (/24)
+# - IPv6: PREFIX6:SEGMENT::ID (/64)
+prefix4="192.168"
+prefix6="fd00"
+a_r1=1
+a_r2=2
+b_r1=3
+b_r2=4
+# ns peer segment
+routing_addrs="
+ A R1 ${a_r1}
+ A R2 ${a_r2}
+ B R1 ${b_r1}
+ B R2 ${b_r2}
+"
+# Traffic from A to B goes through R1 by default, and through R2, if destined to
+# B's address on the b_r2 segment.
+# Traffic from B to A goes through R1.
+# ns destination gateway
+routes="
+ A default ${prefix4}.${a_r1}.2
+ A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2
+ B default ${prefix4}.${b_r1}.2
+
+ A default ${prefix6}:${a_r1}::2
+ A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
+ B default ${prefix6}:${b_r1}::2
+"
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
@@ -94,9 +153,15 @@ err_flush() {
err_buf=
}
+# Find the auto-generated name for this namespace
+nsname() {
+ eval echo \$NS_$1
+}
+
setup_namespaces() {
- ip netns add ${NS_A} || return 1
- ip netns add ${NS_B}
+ for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ ip netns add ${n} || return 1
+ done
}
setup_veth() {
@@ -167,6 +232,49 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_routing() {
+ for i in ${NS_R1} ${NS_R2}; do
+ ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
+ ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
+ done
+
+ for i in ${routing_addrs}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${peer}" = "" ] && peer="${i}" && continue
+ [ "${segment}" = "" ] && segment="${i}"
+
+ ns_name="$(nsname ${ns})"
+ peer_name="$(nsname ${peer})"
+ if="veth_${ns}-${peer}"
+ ifpeer="veth_${peer}-${ns}"
+
+ # Create veth links
+ ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
+ ip -n ${peer_name} link set dev ${ifpeer} up
+
+ # Add addresses
+ ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if}
+ ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if}
+
+ ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer}
+ ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
+
+ ns=""; peer=""; segment=""
+ done
+
+ for i in ${routes}; do
+ [ "${ns}" = "" ] && ns="${i}" && continue
+ [ "${addr}" = "" ] && addr="${i}" && continue
+ [ "${gw}" = "" ] && gw="${i}"
+
+ ns_name="$(nsname ${ns})"
+
+ ip -n ${ns_name} route add ${addr} via ${gw}
+
+ ns=""; addr=""; gw=""
+ done
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -178,8 +286,9 @@ setup() {
cleanup() {
[ ${cleanup_done} -eq 1 ] && return
- ip netns del ${NS_A} 2 > /dev/null
- ip netns del ${NS_B} 2 > /dev/null
+ for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ ip netns del ${n} 2> /dev/null
+ done
cleanup_done=1
}
@@ -196,7 +305,9 @@ mtu_parse() {
next=0
for i in ${input}; do
+ [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
[ ${next} -eq 1 ] && echo "${i}" && return
+ [ ${next} -eq 2 ] && echo "lock ${i}" && return
[ "${i}" = "mtu" ] && next=1
done
}
@@ -229,6 +340,109 @@ route_get_dst_pmtu_from_exception() {
mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
}
+check_pmtu_value() {
+ expected="${1}"
+ value="${2}"
+ event="${3}"
+
+ [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
+ [ "${value}" = "${expected}" ] && return 0
+ [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1
+ [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1
+ err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
+ return 1
+}
+
+test_pmtu_ipvX() {
+ family=${1}
+
+ setup namespaces routing || return 2
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst1="${prefix4}.${b_r1}.1"
+ dst2="${prefix4}.${b_r2}.1"
+ else
+ ping=${ping6}
+ dst1="${prefix6}:${b_r1}::1"
+ dst2="${prefix6}:${b_r2}::1"
+ fi
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1400
+ mtu "${ns_b}" veth_B-R1 1400
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ # Create route exceptions
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst1} > /dev/null
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1800 ${dst2} > /dev/null
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+ # Decrease local MTU below PMTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" veth_A-R1 1300
+ mtu "${ns_r1}" veth_R1-A 1300
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
+ # Second exception shouldn't be modified
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+ # Increase MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" veth_A-R1 1700
+ mtu "${ns_r1}" veth_R1-A 1700
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
+ # Second exception shouldn't be modified
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
+
+ # Skip PMTU locking tests for IPv6
+ [ $family -eq 6 ] && return 0
+
+ # Decrease remote MTU on path via R2, get new exception
+ mtu "${ns_r2}" veth_R2-B 400
+ mtu "${ns_b}" veth_B-R2 400
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+
+ # Decrease local MTU below PMTU
+ mtu "${ns_a}" veth_A-R2 500
+ mtu "${ns_r2}" veth_R2-A 500
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
+
+ # Increase local MTU
+ mtu "${ns_a}" veth_A-R2 1500
+ mtu "${ns_r2}" veth_R2-A 1500
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
+
+ # Get new exception
+ ${ns_a} ${ping} -q -M want -i 0.1 -w 2 -s 1400 ${dst2} > /dev/null
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
+}
+
+test_pmtu_ipv4_exception() {
+ test_pmtu_ipvX 4
+}
+
+test_pmtu_ipv6_exception() {
+ test_pmtu_ipvX 6
+}
+
test_pmtu_vti4_exception() {
setup namespaces veth vti4 xfrm4 || return 2
@@ -248,24 +462,13 @@ test_pmtu_vti4_exception() {
# exception is created
${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
- if [ "${pmtu}" != "" ]; then
- err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
- return 1
- fi
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
# Now exceed link layer MTU by one byte, check that exception is created
+ # with the right PMTU value
${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
- if [ "${pmtu}" = "" ]; then
- err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
- return 1
- fi
-
- # ...with the right PMTU value
- if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
- err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
- return 1
- fi
+ check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
}
test_pmtu_vti6_exception() {
@@ -280,25 +483,18 @@ test_pmtu_vti6_exception() {
${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
# Check that exception was created
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
- err " tunnel exceeding link layer MTU didn't create route exception"
- return 1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
# Decrease tunnel MTU, check for PMTU decrease in route exception
mtu "${ns_a}" vti6_a 3000
-
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
- err " decreasing tunnel MTU didn't decrease route exception PMTU"
- fail=1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
# Increase tunnel MTU, check for PMTU increase in route exception
mtu "${ns_a}" vti6_a 9000
- if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
- err " increasing tunnel MTU didn't increase route exception PMTU"
- fail=1
- fi
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})"
+ check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
return ${fail}
}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 07daff076ce0..fac68d710f35 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile)
buf = (char *)malloc(st.st_size);
EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
- EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+ EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send);
EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
- EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+ EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
}
TEST_F(tls, recv_max)
@@ -160,7 +160,7 @@ TEST_F(tls, msg_more)
EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL),
send_len * 2);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single)
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
- EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe)
ASSERT_GE(pipe(p), 0);
EXPECT_GE(write(p[1], mem_send, send_len), 0);
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2)
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice)
ASSERT_GE(pipe(p), 0);
EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
- EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
EXPECT_GE(write(p[1], mem_send, send_len), send_len);
EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
- EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv)
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
memset(recv_mem, 0, total_len);
- EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len);
EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
@@ -502,6 +502,55 @@ TEST_F(tls, recv_peek_multiple)
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
}
+TEST_F(tls, recv_peek_multiple_records)
+{
+ char const *test_str = "test_read_peek_mult_recs";
+ char const *test_str_first = "test_read_peek";
+ char const *test_str_second = "_mult_recs";
+ int len;
+ char buf[64];
+
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str_first);
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ /* MSG_PEEK can only peek into the current record. */
+ len = strlen(test_str_first);
+ EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
+
+ /* Non-MSG_PEEK will advance strparser (and therefore record)
+ * however.
+ */
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+ /* MSG_MORE will hold current record open, so later MSG_PEEK
+ * will see everything.
+ */
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
TEST_F(tls, recv_peek_large_buf_mult_recs)
{
char const *test_str = "test_read_peek_mult_recs";
@@ -524,6 +573,7 @@ TEST_F(tls, recv_peek_large_buf_mult_recs)
EXPECT_EQ(memcmp(test_str, buf, len), 0);
}
+
TEST_F(tls, pollin)
{
char const *test_str = "test_poll";
@@ -537,7 +587,7 @@ TEST_F(tls, pollin)
EXPECT_EQ(poll(&fd, 1, 20), 1);
EXPECT_EQ(fd.revents & POLLIN, 1);
- EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
/* Test timing out */
EXPECT_EQ(poll(&fd, 1, 20), 0);
}
@@ -555,7 +605,7 @@ TEST_F(tls, poll_wait)
/* Set timeout to inf. secs */
EXPECT_EQ(poll(&fd, 1, -1), 1);
EXPECT_EQ(fd.revents & POLLIN, 1);
- EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len);
}
TEST_F(tls, blocking)
@@ -701,7 +751,7 @@ TEST_F(tls, control_msg)
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
vec.iov_base = buf;
- EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+ EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
cmsg = CMSG_FIRSTHDR(&msg);
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
index a728040edbe1..14cfcf006936 100644
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -5,6 +5,7 @@ TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
all: $(TEST_PROGS)
+top_srcdir = ../../../../..
include ../../lib.mk
clean:
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 93baacab7693..d056486f49de 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,5 +1,6 @@
TEST_GEN_PROGS := copy_first_unaligned alignment_handler
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index b4d7432a0ecd..d40300a65b42 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -4,6 +4,7 @@ TEST_GEN_FILES := exec_target
CFLAGS += -O2
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index 1be547434a49..ede4d3dae750 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -5,6 +5,7 @@ all: $(TEST_PROGS)
$(TEST_PROGS): ../harness.c ../utils.c
+top_srcdir = ../../../../..
include ../../lib.mk
clean:
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 1cf89a34d97c..44574f3818b3 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -17,6 +17,7 @@ TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
EXTRA_SOURCES := validate.c ../harness.c stubs.S
+top_srcdir = ../../../../..
include ../../lib.mk
$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 55d7db7a616b..5df476364b4d 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,6 +3,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
dscr_sysfs_thread_test
+top_srcdir = ../../../../..
include ../../lib.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 0dd3a01fdab9..11a10d7a2bbd 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 8ebbe96d80a8..33ced6e0ad25 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -5,6 +5,7 @@ noarg:
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
TEST_GEN_FILES := tempfile
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 6e1629bf5b09..19046db995fe 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -5,6 +5,7 @@ noarg:
TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
+top_srcdir = ../../../../..
include ../../lib.mk
all: $(TEST_GEN_PROGS) ebb
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c4e64bc2e265..bd5dfa509272 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -17,6 +17,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
lost_exception_test no_handler_test \
cycles_with_mmcr2_test
+top_srcdir = ../../../../../..
include ../../../lib.mk
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index 175366db7be8..ea2b7bd09e36 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -2,6 +2,7 @@ CFLAGS += -I$(CURDIR)
TEST_GEN_PROGS := load_unaligned_zeropad
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 28f5b781a553..923d531265f8 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -4,6 +4,7 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
perf-hwbreak
+top_srcdir = ../../../../..
include ../../lib.mk
all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index a7cbd5082e27..1fca25c6ace0 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -8,6 +8,7 @@ $(TEST_PROGS): ../harness.c ../utils.c signal.S
CFLAGS += -maltivec
signal_tm: CFLAGS += -mhtm
+top_srcdir = ../../../../..
include ../../lib.mk
clean:
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 10b35c87a4f4..7fc0623d85c3 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -29,6 +29,7 @@ endif
ASFLAGS = $(CFLAGS)
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index 30b8ff8fb82e..fcd2dcb8972b 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -5,6 +5,7 @@ ASFLAGS += -O2 -Wall -g -nostdlib -m64
EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
+top_srcdir = ../../../../..
include ../../lib.mk
$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index da22ca7c38c1..161b8846336f 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := ipc_unmuxed
CFLAGS += -I../../../../../usr/include
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c0e45d2dde25..9fc2cf6fbc92 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index f8ced26748f8..fb82068c9fda 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -2,6 +2,7 @@ TEST_GEN_PROGS := test-vphn
CFLAGS += -m64
+top_srcdir = ../../../../..
include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 642d4e12abea..eec2663261f2 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -56,15 +56,13 @@ unsigned int yield_mod_cnt, nr_abort;
printf(fmt, ## __VA_ARGS__); \
} while (0)
-#if defined(__x86_64__) || defined(__i386__)
+#ifdef __i386__
#define INJECT_ASM_REG "eax"
#define RSEQ_INJECT_CLOBBER \
, INJECT_ASM_REG
-#ifdef __i386__
-
#define RSEQ_INJECT_ASM(n) \
"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
@@ -76,9 +74,16 @@ unsigned int yield_mod_cnt, nr_abort;
#elif defined(__x86_64__)
+#define INJECT_ASM_REG_P "rax"
+#define INJECT_ASM_REG "eax"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG_P \
+ , INJECT_ASM_REG
+
#define RSEQ_INJECT_ASM(n) \
- "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \
- "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
+ "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+ "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
"jz 333f\n\t" \
"222:\n\t" \
@@ -86,10 +91,6 @@ unsigned int yield_mod_cnt, nr_abort;
"jnz 222b\n\t" \
"333:\n\t"
-#else
-#error "Unsupported architecture"
-#endif
-
#elif defined(__s390__)
#define RSEQ_INJECT_INPUT \
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index 49a6f8c3fdae..f9281e8aa313 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -232,6 +232,8 @@ directory:
and the other is a test whether the command leaked memory or not.
(This one is a preliminary version, it may not work quite right yet,
but the overall template is there and it should only need tweaks.)
+ - buildebpfPlugin.py:
+ builds all programs in $EBPFDIR.
ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000000..dc92eb271d9a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000000..c32b99b80e19
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+ return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+ s->data = 0x0;
+ return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
new file mode 100644
index 000000000000..9f0ba10c44b4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -0,0 +1,66 @@
+'''
+build ebpf program
+'''
+
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'buildebpf/SubPlugin'
+ self.tap = ''
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ super().pre_suite(testcount, testidlist)
+
+ if self.args.buildebpf:
+ self._ebpf_makeall()
+
+ def post_suite(self, index):
+ super().post_suite(index)
+
+ self._ebpf_makeclean()
+
+ def add_args(self, parser):
+ super().add_args(parser)
+
+ self.argparser_group = self.argparser.add_argument_group(
+ 'buildebpf',
+ 'options for buildebpfPlugin')
+ self.argparser_group.add_argument(
+ '-B', '--buildebpf', action='store_true',
+ help='build eBPF programs')
+
+ return self.argparser
+
+ def _ebpf_makeall(self):
+ if self.args.buildebpf:
+ self._make('all')
+
+ def _ebpf_makeclean(self):
+ if self.args.buildebpf:
+ self._make('clean')
+
+ def _make(self, target):
+ command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8")
+ else:
+ foutput = rawout.decode("utf-8")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 6f289a49e5ec..5970cee6d05f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -55,7 +55,6 @@
"bpf"
],
"setup": [
- "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o",
[
"$TC action flush action bpf",
0,
@@ -63,14 +62,13 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
- "$TC action flush action bpf",
- "rm -f _b.o"
+ "$TC action flush action bpf"
]
},
{
@@ -81,7 +79,6 @@
"bpf"
],
"setup": [
- "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o",
[
"$TC action flush action bpf",
0,
@@ -89,10 +86,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
[
@@ -100,8 +97,7 @@
0,
1,
255
- ],
- "rm -f _c.o"
+ ]
]
},
{
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index a023d0d62b25..d651bc1501bd 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -16,7 +16,9 @@ NAMES = {
'DEV2': '',
'BATCH_FILE': './batch.txt',
# Name of the namespace to use
- 'NS': 'tcut'
+ 'NS': 'tcut',
+ # Directory containing eBPF test programs
+ 'EBPFDIR': './bpf'
}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 9881876d2aa0..e94b7b14bcb2 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -26,10 +26,6 @@ TEST_PROGS := run_vmtests
include ../lib.mk
-$(OUTPUT)/userfaultfd: ../../../../usr/include/linux/kernel.h
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
-
-../../../../usr/include/linux/kernel.h:
- make -C ../../../.. headers_install
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 235259011704..35edd61d1663 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -17,6 +17,7 @@
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
+#include <limits.h>
#ifndef SYS_getcpu
# ifdef __x86_64__
@@ -31,6 +32,14 @@
int nerrs = 0;
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
getcpu_t vgetcpu;
@@ -95,6 +104,15 @@ static void fill_function_pointers()
printf("Warning: failed to find getcpu in vDSO\n");
vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ printf("Warning: failed to find clock_gettime in vDSO\n");
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gettimeofday)
+ printf("Warning: failed to find gettimeofday in vDSO\n");
+
}
static long sys_getcpu(unsigned * cpu, unsigned * node,
@@ -103,6 +121,16 @@ static long sys_getcpu(unsigned * cpu, unsigned * node,
return syscall(__NR_getcpu, cpu, node, cache);
}
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(__NR_gettimeofday, tv, tz);
+}
+
static void test_getcpu(void)
{
printf("[RUN]\tTesting getcpu...\n");
@@ -155,10 +183,154 @@ static void test_getcpu(void)
}
}
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+ [0] = "CLOCK_REALTIME",
+ [1] = "CLOCK_MONOTONIC",
+ [2] = "CLOCK_PROCESS_CPUTIME_ID",
+ [3] = "CLOCK_THREAD_CPUTIME_ID",
+ [4] = "CLOCK_MONOTONIC_RAW",
+ [5] = "CLOCK_REALTIME_COARSE",
+ [6] = "CLOCK_MONOTONIC_COARSE",
+ [7] = "CLOCK_BOOTTIME",
+ [8] = "CLOCK_REALTIME_ALARM",
+ [9] = "CLOCK_BOOTTIME_ALARM",
+ [10] = "CLOCK_SGI_CYCLE",
+ [11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+ struct timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ end_ret = sys_clock_gettime(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+}
+
+static void test_clock_gettime(void)
+{
+ for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+ clock++) {
+ test_one_clock_gettime(clock, clocknames[clock]);
+ }
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime(-1, "invalid");
+ test_one_clock_gettime(INT_MIN, "invalid");
+ test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+ struct timeval start, vdso, end;
+ struct timezone sys_tz, vdso_tz;
+ int vdso_ret, end_ret;
+
+ if (!vdso_gettimeofday)
+ return;
+
+ printf("[RUN]\tTesting gettimeofday...\n");
+
+ if (sys_gettimeofday(&start, &sys_tz) < 0) {
+ printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+ nerrs++;
+ return;
+ }
+
+ vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ end_ret = sys_gettimeofday(&end, NULL);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+ (unsigned long long)start.tv_sec, start.tv_usec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+ (unsigned long long)end.tv_sec, end.tv_usec);
+
+ if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+
+ if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+ sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+ printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+ sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+ } else {
+ printf("[FAIL]\ttimezones do not match\n");
+ nerrs++;
+ }
+
+ /* And make sure that passing NULL for tz doesn't crash. */
+ vdso_gettimeofday(&vdso, NULL);
+}
+
int main(int argc, char **argv)
{
fill_function_pointers();
+ test_clock_gettime();
+ test_gettimeofday();
+
+ /*
+ * Test getcpu() last so that, if something goes wrong setting affinity,
+ * we still run the other tests.
+ */
test_getcpu();
return nerrs ? 1 : 0;